Source code for mani_skill.envs.tasks.rotate_cube

from typing import Any, Tuple

import numpy as np
import torch
import torch.random

from mani_skill import PACKAGE_ASSET_DIR
from mani_skill.agents.robots import TriFingerPro
from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.envs.utils.randomization.pose import random_quaternions
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import common, sapien_utils
from mani_skill.utils.building import ActorBuilder, actors
from mani_skill.utils.building.ground import build_ground
from mani_skill.utils.registration import register_env
from mani_skill.utils.structs import Actor, Articulation, Pose
from mani_skill.utils.structs.types import Array, GPUMemoryConfig, SimConfig


[docs]class RotateCubeEnv(BaseEnv): """ Modified from https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/isaacgymenvs/tasks/trifinger.py https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/isaacgymenvs/cfg/task/Trifinger.yaml """
[docs] SUPPORTED_ROBOTS = ["trifingerpro"]
# Specify some supported robot types
[docs] agent: TriFingerPro
# Specify default simulation/gpu memory configurations.
[docs] sim_config = SimConfig( gpu_memory_config=GPUMemoryConfig( found_lost_pairs_capacity=2**25, max_rigid_patch_count=2**18 ) )
# set some commonly used values
[docs] goal_radius = 0.02
[docs] cube_half_size = 0.02
# radius of the area
[docs] ARENA_RADIUS = 0.195
[docs] size = 0.065 # m
[docs] max_len = 0.065
# 3D radius of the cuboid
[docs] radius_3d = max_len * np.sqrt(3) / 2
# compute distance from wall to the center
[docs] max_com_distance_to_center = ARENA_RADIUS - radius_3d
# minimum and maximum height for spawning the object
[docs] min_height = 0.065 / 2
[docs] max_height = 0.1
def __init__( self, *args, robot_uids="trifingerpro", robot_init_qpos_noise=0.02, difficulty_level: int = 4, **kwargs, ):
[docs] self.robot_init_qpos_noise = robot_init_qpos_noise
if ( not isinstance(difficulty_level, int) or difficulty_level >= 5 or difficulty_level < 0 ): raise ValueError( f"Difficulty level must be a int within 0-4, but get {difficulty_level}" )
[docs] self.difficulty_level = difficulty_level
super().__init__(*args, robot_uids=robot_uids, **kwargs) @property
[docs] def _default_sensor_configs(self): pose = sapien_utils.look_at(eye=(0.7, 0.0, 0.7), target=(0.0, 0.0, 0.0)) return [CameraConfig("base_camera", pose, 128, 128, np.pi / 2, 0.01, 100)]
@property
[docs] def _default_human_render_camera_configs(self): pose = sapien_utils.look_at(eye=(0.7, 0.0, 0.7), target=(0.0, 0.0, 0.0)) return CameraConfig("render_camera", pose, 512, 512, 1, 0.01, 100)
[docs] def _load_scene(self, options: dict): self.ground = build_ground(self.scene, altitude=0) loader1 = self.scene.create_urdf_loader() loader1.fix_root_link = True loader1.name = "table" urdf_path = f"{PACKAGE_ASSET_DIR}/robots/trifinger/table_without_border.urdf" table: Articulation = loader1.load(urdf_path) builder: ActorBuilder = self.scene.create_actor_builder() high_table_boundary_file_name = f"{PACKAGE_ASSET_DIR}/robots/trifinger/robot_properties_fingers/meshes/high_table_boundary.stl" builder.add_nonconvex_collision_from_file( filename=high_table_boundary_file_name, scale=[1, 1, 1], material=None ) builder.add_visual_from_file(filename=high_table_boundary_file_name) table_boundary: Actor = builder.build_static("table2") self.obj = actors.build_colorful_cube( self.scene, half_size=self.size / 2, color=np.array([169, 42, 12, 255]) / 255, name="cube", body_type="dynamic", add_collision=True, ) self.obj_goal = actors.build_colorful_cube( self.scene, half_size=self.size / 2, color=np.array([12, 160, 42, 255]) / 255, name="cube_goal", body_type="kinematic", add_collision=False, ) self._hidden_objects.append(self.obj_goal)
[docs] def _initialize_actors(self, env_idx: torch.Tensor): with torch.device(self.device): b = len(env_idx) xyz = torch.zeros((b, 3)) xyz[..., 2] = self.size / 2 + 0.005 obj_pose = Pose.create_from_pq(p=xyz, q=[1, 0, 0, 0]) self.obj.set_pose(obj_pose) pos, orn = self._sample_object_goal_poses( env_idx, difficulty=self.difficulty_level ) self.obj_goal.set_pose(Pose.create_from_pq(p=pos, q=orn)) self.prev_norms = None
[docs] def _initialize_episode(self, env_idx: torch.Tensor, options: dict): self._initialize_actors(env_idx) self._initialize_agent(env_idx)
[docs] def _sample_object_goal_poses(self, env_idx: torch.Tensor, difficulty: int): """Sample goal poses for the cube and sets them into the desired goal pose buffer. Args: instances: A tensor constraining indices of environment instances to reset. difficulty: Difficulty level. The higher, the more difficult is the goal. Possible levels are: - 0: Random goal position on the table, no orientation. - 1: Random goal position on the table, including yaw orientation. - 2: Fixed goal position in the air with x,y = 0. No orientation. - 3: Random goal position in the air, no orientation. - 4: Random goal pose in the air, including orientation. """ b = len(env_idx) default_orn = torch.tensor( [1.0, 0.0, 0.0, 0.0], dtype=torch.float, device=self.device ).repeat(b, 1) def random_xy() -> Tuple[torch.Tensor, torch.Tensor]: """Returns sampled uniform positions in circle (https://stackoverflow.com/a/50746409)""" # sample radius of circle radius = torch.sqrt(torch.rand(b, dtype=torch.float, device=self.device)) radius *= self.max_com_distance_to_center # sample theta of point theta = 2 * np.pi * torch.rand(b, dtype=torch.float, device=self.device) # x,y-position of the cube x = radius * torch.cos(theta) y = radius * torch.sin(theta) return x, y def random_z(min_height: float, max_height: float) -> torch.Tensor: """Returns sampled height of the goal object.""" z = torch.rand(b, dtype=torch.float, device=self.device) z = (max_height - min_height) * z + min_height return z if difficulty == 0: # Random goal position on the table, no orientation. pos_x, pos_y = random_xy() pos_z = self.size / 2 orientation = default_orn elif difficulty == 1: # For initialization pos_x, pos_y = random_xy() pos_z = self.size / 2 orientation = random_quaternions( b, lock_x=True, lock_y=True, device=self.device ) elif difficulty == 2: # Fixed goal position in the air with x,y = 0. No orientation. pos_x, pos_y = 0.0, 0.0 pos_z = self.min_height + 0.05 orientation = default_orn elif difficulty == 3: # Random goal position in the air, no orientation. pos_x, pos_y = random_xy() pos_z = random_z(min_height=self.min_height, max_height=self.max_height) orientation = default_orn elif difficulty == 4: # Random goal pose in the air, including orientation. # Note: Set minimum height such that the cube does not intersect with the # ground in any orientation # pick x, y, z according to the maximum height / radius at the current point # in the cirriculum pos_x, pos_y = random_xy() pos_z = random_z(min_height=self.radius_3d, max_height=self.max_height) orientation = random_quaternions(b, device=self.device) else: msg = f"Invalid difficulty index for task: {difficulty}." raise ValueError(msg) pos_tensor = torch.zeros((b, 3), dtype=torch.float, device=self.device) pos_tensor[:, 0] = pos_x pos_tensor[:, 1] = pos_y pos_tensor[:, 2] = pos_z return pos_tensor, orientation
[docs] def evaluate(self): obj_p = self.obj.pose.p goal_p = self.obj_goal.pose.p obj_q = self.obj.pose.q goal_q = self.obj_goal.pose.q is_obj_pos_close_to_goal = ( torch.linalg.norm(obj_p - goal_p, axis=1) < self.goal_radius ) is_obj_q_close_to_goal = common.quat_diff_rad(obj_q, goal_q) < 0.1 is_success = is_obj_pos_close_to_goal & is_obj_q_close_to_goal return { "success": is_success, }
[docs] def _initialize_agent(self, env_idx: torch.Tensor): with torch.device(self.device): b = len(env_idx) dof = self.agent.robot.dof if isinstance(dof, torch.Tensor): dof = dof[0] init_qpos = torch.zeros((b, dof)) init_qpos += torch.randn((b, dof)) * self.robot_init_qpos_noise self.agent.reset(init_qpos) self.agent.robot.set_pose( Pose.create_from_pq( torch.tensor([0.0, 0, self.size / 2 + 0.022]), torch.tensor([1, 0, 0, 0]), ) )
[docs] def _get_obs_extra(self, info: dict): obs = dict( goal_pos=self.obj_goal.pose.p, goal_q=self.obj_goal.pose.q, ) if self.obs_mode_struct.use_state: obs.update( obj_p=self.obj.pose.p, obj_q=self.obj.pose.q, ) return obs
[docs] def compute_dense_reward(self, obs: Any, action: Array, info: dict): obj_pos = self.obj.pose.p obj_q = self.obj.pose.q goal_pos = self.obj_goal.pose.p goal_q = self.obj_goal.pose.q object_dist_weight = 5 object_rot_weight = 5 # Reward penalising finger movement tip_poses = self.agent.tip_poses # shape (N, 3 + 4, 3 fingers) finger_reach_object_dist_1 = torch.norm( tip_poses[:, :3, 0] - obj_pos, p=2, dim=-1 ) finger_reach_object_dist_2 = torch.norm( tip_poses[:, :3, 1] - obj_pos, p=2, dim=-1 ) finger_reach_object_dist_3 = torch.norm( tip_poses[:, :3, 2] - obj_pos, p=2, dim=-1 ) finger_reach_object_reward1 = 1 - torch.tanh(5 * finger_reach_object_dist_1) finger_reach_object_reward2 = 1 - torch.tanh(5 * finger_reach_object_dist_2) finger_reach_object_reward3 = 1 - torch.tanh(5 * finger_reach_object_dist_3) finger_reach_object_reward = ( object_dist_weight * ( finger_reach_object_reward1 + finger_reach_object_reward2 + finger_reach_object_reward3 ) / 3 ) # Reward for object distance object_dist = torch.norm(obj_pos - goal_pos, p=2, dim=-1) init_xyz_tensor = torch.tensor( [0, 0, 0.032], dtype=torch.float, device=self.device ).reshape(1, 3) init_z_dist = torch.norm( init_xyz_tensor - goal_pos[ ..., ], p=2, dim=-1, ) # object_dist_reward = object_dist_weight * dt * lgsk_kernel(object_dist, scale=50., eps=2.) object_dist_reward = 1 - torch.tanh(5 * object_dist) object_init_dist_reward = 1 - torch.tanh(5 * init_z_dist) object_dist_reward -= object_init_dist_reward init_z_tensor = torch.tensor( [0.032], dtype=torch.float, device=self.device ).reshape(1, 1) object_z_dist = torch.norm(obj_pos[..., 2:3] - goal_pos[..., 2:3], p=2, dim=-1) init_z_dist = torch.norm(init_z_tensor - goal_pos[..., 2:3], p=2, dim=-1) object_lift_reward = 5 * ((1 - torch.tanh(5 * object_z_dist))) object_init_z_reward = 5 * ((1 - torch.tanh(5 * init_z_dist))) object_lift_reward -= object_init_z_reward # extract quaternion orientation angles = common.quat_diff_rad(obj_q, goal_q) object_rot_reward = -1 * torch.abs(angles) pose_reward = ( object_dist_weight * (object_dist_reward + object_lift_reward) + object_rot_weight * object_rot_reward ) total_reward = finger_reach_object_reward + pose_reward total_reward = total_reward.clamp(-15, 15) total_reward[info["success"]] = 15 return total_reward
[docs] def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict): self.max_reward = 15 dense_reward = self.compute_dense_reward(obs=obs, action=action, info=info) norm_dense_reward = dense_reward / (2 * self.max_reward) + 0.5 return norm_dense_reward
@register_env("TriFingerRotateCubeLevel0-v1", max_episode_steps=250)
[docs]class RotateCubeEnvLevel0(RotateCubeEnv): def __init__(self, *args, **kwargs): super().__init__( *args, robot_init_qpos_noise=0.02, difficulty_level=0, **kwargs, )
@register_env("TriFingerRotateCubeLevel1-v1", max_episode_steps=250)
[docs]class RotateCubeEnvLevel1(RotateCubeEnv): def __init__(self, *args, **kwargs): super().__init__( *args, robot_init_qpos_noise=0.02, difficulty_level=1, **kwargs, )
@register_env("TriFingerRotateCubeLevel2-v1", max_episode_steps=250)
[docs]class RotateCubeEnvLevel2(RotateCubeEnv): def __init__(self, *args, **kwargs): super().__init__( *args, robot_init_qpos_noise=0.02, difficulty_level=2, **kwargs, )
@register_env("TriFingerRotateCubeLevel3-v1", max_episode_steps=250)
[docs]class RotateCubeEnvLevel3(RotateCubeEnv): def __init__(self, *args, **kwargs): super().__init__( *args, robot_init_qpos_noise=0.02, difficulty_level=3, **kwargs, )
@register_env("TriFingerRotateCubeLevel4-v1", max_episode_steps=250)
[docs]class RotateCubeEnvLevel4(RotateCubeEnv): def __init__(self, *args, **kwargs): super().__init__( *args, robot_init_qpos_noise=0.02, difficulty_level=4, **kwargs, )