Source code for mani_skill.envs.tasks.humanoid.transport_box

import copy
import os
from pathlib import Path
from typing import Any

import numpy as np
import sapien
import torch
from transforms3d.euler import euler2quat

from mani_skill.agents.robots.unitree_g1.g1_upper_body import (
    UnitreeG1UpperBodyWithHeadCamera,
)
from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.envs.utils import randomization
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import common, sapien_utils
from mani_skill.utils.building import ground
from mani_skill.utils.registration import register_env
from mani_skill.utils.structs.pose import Pose
from mani_skill.utils.structs.types import GPUMemoryConfig, SceneConfig, SimConfig


@register_env("UnitreeG1TransportBox-v1", max_episode_steps=100)
[docs]class TransportBoxEnv(BaseEnv): """ **Task Description:** A G1 humanoid robot must find a box on a table and transport it to the other table and place it there. **Randomizations:** - the box's xy position is randomized in the region [-0.05, -0.05] x [0.2, 0.05] - the box's z-axis rotation is randomized to a random angle in [0, np.pi/6] **Success Conditions:** - the box is resting on top of the other table """
[docs] SUPPORTED_ROBOTS = ["unitree_g1_simplified_upper_body_with_head_camera"]
[docs] agent: UnitreeG1UpperBodyWithHeadCamera
def __init__(self, *args, **kwargs):
[docs] self.init_robot_pose = copy.deepcopy( UnitreeG1UpperBodyWithHeadCamera.keyframes["standing"].pose )
self.init_robot_pose.p = [-0.1, 0, 0.755]
[docs] self.init_robot_qpos = UnitreeG1UpperBodyWithHeadCamera.keyframes[ "standing" ].qpos.copy()
self.init_robot_qpos[4] = -1.25 self.init_robot_qpos[3] = 1.25 super().__init__( *args, robot_uids="unitree_g1_simplified_upper_body_with_head_camera", **kwargs ) @property
[docs] def _default_sim_config(self): return SimConfig( gpu_memory_config=GPUMemoryConfig( max_rigid_contact_count=2**22, max_rigid_patch_count=2**21 ), scene_config=SceneConfig(contact_offset=0.02), )
@property
[docs] def _default_sensor_configs(self): pose = sapien_utils.look_at([1.0, 0.0, 1.6], [0, 0.0, 0.65]) return [ CameraConfig("base_camera", pose=pose, width=128, height=128, fov=np.pi / 3) ]
@property
[docs] def _default_human_render_camera_configs(self): pose = sapien_utils.look_at([1.0, 0.0, 1.6], [0, 0.0, 0.65]) return CameraConfig( "render_camera", pose=pose, width=512, height=512, fov=np.pi / 3 )
[docs] def _load_agent(self, options: dict): super()._load_agent(options, sapien.Pose(p=[0, 0, 1]))
[docs] def _load_scene(self, options: dict): self.ground = ground.build_ground(self.scene, mipmap_levels=7) # build two tables model_dir = Path( os.path.join( os.path.dirname(__file__), "../../../utils/scene_builder/table/assets" ) ) table_model_file = str(model_dir / "table.glb") scale = 1.2 table_pose = sapien.Pose(q=euler2quat(0, 0, np.pi / 2)) builder = self.scene.create_actor_builder() builder.add_visual_from_file( filename=table_model_file, scale=[scale] * 3, pose=sapien.Pose(q=euler2quat(0, 0, np.pi / 2)), ) builder.add_box_collision( pose=sapien.Pose(p=[0, 0, 0.630612274 / 2]), half_size=(1.658057143 / 2, 0.829028571 / 2, 0.630612274 / 2), ) builder.add_visual_from_file( filename=table_model_file, scale=[scale] * 3, pose=table_pose ) builder.initial_pose = sapien.Pose(p=[0, 0.66, 0]) self.table_1 = builder.build_static(name="table-1") builder = self.scene.create_actor_builder() builder.add_visual_from_file( filename=table_model_file, scale=[scale] * 3, pose=sapien.Pose(q=euler2quat(0, 0, np.pi / 2)), ) builder.add_box_collision( pose=sapien.Pose(p=[0, 0, 0.630612274 / 2]), half_size=(1.658057143 / 2, 0.829028571 / 2, 0.630612274 / 2), ) builder.add_visual_from_file( filename=table_model_file, scale=[scale] * 3, pose=table_pose ) builder.initial_pose = sapien.Pose(p=[0, -0.66, 0]) self.table_2 = builder.build_static(name="table-2") builder = self.scene.create_actor_builder() builder.add_box_collision(half_size=(0.18, 0.12, 0.12), density=200) visual_file = os.path.join( os.path.dirname(__file__), "assets/cardboard_box/textured.obj" ) builder.add_visual_from_file( filename=visual_file, scale=[0.12] * 3, pose=sapien.Pose(q=euler2quat(0, 0, np.pi / 2)), ) builder.initial_pose = sapien.Pose(p=[-0.1, -0.37, 0.7508]) self.box = builder.build(name="box")
[docs] def _initialize_episode(self, env_idx: torch.Tensor, options: dict): with torch.device(self.device): b = len(env_idx) self.agent.robot.set_qpos(self.init_robot_qpos) self.agent.robot.set_pose(self.init_robot_pose) xyz = torch.zeros((b, 3)) xyz[:, 2] = 0.7508 xyz[:, 0] = randomization.uniform(-0.05, 0.2, size=(b,)) xyz[:, 1] = randomization.uniform(-0.05, 0.05, size=(b,)) xyz[:, :2] += torch.tensor([-0.1, -0.37]) quat = randomization.random_quaternions( n=b, device=self.device, lock_x=True, lock_y=True, bounds=(0, np.pi / 6) ) self.box.set_pose(Pose.create_from_pq(xyz, quat))
[docs] def evaluate(self): # left_hand_grasped_box = self.agent.left_hand_is_grasping(self.box, max_angle=110) # right_hand_grasped_box = self.agent.right_hand_is_grasping(self.box, max_angle=110) l_contact_forces = ( ( self.scene.get_pairwise_contact_forces( self.agent.robot.links_map["left_five_link"], self.box ) + self.scene.get_pairwise_contact_forces( self.agent.robot.links_map["left_three_link"], self.box ) + self.scene.get_pairwise_contact_forces( self.agent.robot.links_map["left_palm_link"], self.box ) ) .abs() .sum(dim=1) ) r_contact_forces = ( ( self.scene.get_pairwise_contact_forces( self.agent.robot.links_map["right_five_link"], self.box ) + self.scene.get_pairwise_contact_forces( self.agent.robot.links_map["right_three_link"], self.box ) + self.scene.get_pairwise_contact_forces( self.agent.robot.links_map["right_palm_link"], self.box ) ) .abs() .sum(dim=1) ) left_hand_hit_box = l_contact_forces > 10 right_hand_hit_box = r_contact_forces > 10 # is grasping the box if both hands contact the box and the tcp of the hands are below the grasp points on the box. box_grasped = ( left_hand_hit_box & right_hand_hit_box & ( self.agent.right_tcp.pose.p[:, 2] < self.box_right_grasp_point.p[:, 2] + 0.04 ) & ( self.agent.left_tcp.pose.p[:, 2] < self.box_left_grasp_point.p[:, 2] + 0.04 ) ) # simply requires box to be resting somewhere on the correct table box_at_correct_table_z = (0.751 > self.box.pose.p[:, 2]) & ( self.box.pose.p[:, 2] > 0.750 ) box_at_correct_table_xy = ( (0.78 > self.box.pose.p[:, 0]) & (self.box.pose.p[:, 0] > -0.78) & (1.0 > self.box.pose.p[:, 1]) & (self.box.pose.p[:, 1] > 0.3) ) # box_at_correct_table = torch.linalg.norm(self.box.pose.p - torch.tensor([0, 0.66, 0.731], device=self.device), dim=1) < 0.05 box_at_correct_table = box_at_correct_table_z & box_at_correct_table_xy facing_table_with_box = (-1.7 < self.agent.robot.qpos[:, 0]) & ( self.agent.robot.qpos[:, 0] < -1.4 ) # in this range the robot is probably facing the box on the left table. return { "success": ~box_grasped & box_at_correct_table, "left_hand_hit_box": l_contact_forces > 0, "right_hand_hit_box": r_contact_forces > 0, "box_grasped": box_grasped, "box_at_correct_table_xy": box_at_correct_table_xy, "facing_table_with_box": facing_table_with_box, }
[docs] def _get_obs_extra(self, info: dict): obs = dict( right_tcp_pose=self.agent.right_tcp.pose.raw_pose, left_tcp_pose=self.agent.left_tcp.pose.raw_pose, ) if "state" in self.obs_mode: obs.update( box_pose=self.box.pose.raw_pose, right_tcp_to_box_pos=self.box.pose.p - self.agent.right_tcp.pose.p, left_tcp_to_box_pos=self.box.pose.p - self.agent.left_tcp.pose.p, ) return obs
@property
[docs] def box_right_grasp_point(self): return self.box.pose * Pose.create_from_pq( torch.tensor([-0.165, 0.07, 0.05], device=self.device) )
@property
[docs] def box_left_grasp_point(self): return self.box.pose * Pose.create_from_pq( torch.tensor([0.165, 0.07, 0.05], device=self.device) )
[docs] def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict): # Stage 1, move to face the box on the table. Succeeds if facing_table_with_box reward = 1 - torch.tanh((self.agent.robot.qpos[:, 0] + 1.4).abs()) # Stage 2, grasp the box stably. Succeeds if box_grasped # encourage arms to go down essentially and for tcps to be close to the edge of the box stage_2_reward = ( 1 + (1 - torch.tanh((self.agent.robot.qpos[:, 3]).abs())) / 4 + (1 - torch.tanh((self.agent.robot.qpos[:, 4]).abs())) / 4 + ( 1 - torch.tanh( 3 * torch.linalg.norm( self.agent.right_tcp.pose.p - self.box_right_grasp_point.p, dim=1, ) ) ) / 4 + ( 1 - torch.tanh( 3 * torch.linalg.norm( self.agent.left_tcp.pose.p - self.box_left_grasp_point.p, dim=1 ) ) ) / 4 ) reward[info["facing_table_with_box"]] = stage_2_reward[ info["facing_table_with_box"] ] # Stage 3 transport box to above the other table, Succeeds if box_at_correct_table_xy stage_3_reward = ( 2 + 1 - torch.tanh((self.agent.robot.qpos[:, 0] - 1.4).abs() / 5) ) reward[info["box_grasped"]] = stage_3_reward[info["box_grasped"]] # Stage 4 let go of the box. Succeeds if success (~box_grasped & box_at_correct_table) stage_4_reward = ( 3 + (1 - torch.tanh((self.agent.robot.qpos[:, 3] - 1.25).abs())) / 2 + (1 - torch.tanh((self.agent.robot.qpos[:, 4] + 1.25).abs())) / 2 ) reward[info["box_at_correct_table_xy"]] = stage_4_reward[ info["box_at_correct_table_xy"] ] # encourage agent to stay close to a target qposition? reward[info["success"]] = 5 return reward
[docs] def compute_normalized_dense_reward( self, obs: Any, action: torch.Tensor, info: dict ): return self.compute_dense_reward(obs, action, info) / 5