Source code for mani_skill.envs.tasks.control.humanoid

"""Adapted from https://github.com/google-deepmind/dm_control/blob/main/dm_control/suite/humanoid.py"""

from typing import Any, Union

import numpy as np
import sapien
import torch

from mani_skill.agents.robots.humanoid import Humanoid
from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.envs.utils import randomization, rewards
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import common, sapien_utils
from mani_skill.utils.building.ground import build_ground
from mani_skill.utils.geometry import rotation_conversions
from mani_skill.utils.registration import register_env
from mani_skill.utils.structs.pose import Pose
from mani_skill.utils.structs.types import Array, SceneConfig, SimConfig

# dm_control humanoid reward targets
[docs]_STAND_HEIGHT = 1.4
[docs]_WALK_SPEED = 1
[docs]_RUN_SPEED = 10
[docs]class HumanoidEnvBase(BaseEnv):
[docs] agent: Union[Humanoid]
def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) @property
[docs] def _default_sim_config(self): return SimConfig( scene_config=SceneConfig( solver_position_iterations=4, solver_velocity_iterations=1 ), spacing=20, sim_freq=200, control_freq=40, )
@property
[docs] def _default_sensor_configs(self): return [ CameraConfig( uid="side_cam", pose=sapien_utils.look_at(eye=[0, -3, 1], target=[0, 0, 0]), width=128, height=128, fov=60 * np.pi / 180, near=0.01, far=100, mount=self.camera_mount, ), ]
@property
[docs] def _default_human_render_camera_configs(self): return [ CameraConfig( uid="training_side_vis", pose=sapien_utils.look_at(eye=[0, -3, 1], target=[0, 0, 0]), width=512, height=512, fov=60 * np.pi / 180, near=0.01, far=100, mount=self.camera_mount, ), ]
# reset the mounted camera - camera only follows torso pos, not orientation
[docs] def _before_control_step(self): self.camera_mount.set_pose( Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p) ) if self.gpu_sim_enabled: # we update just actor pose here, no need to call apply_all/fetch_all self.scene.px.gpu_apply_rigid_dynamic_data() self.scene.px.gpu_fetch_rigid_dynamic_data()
@property
[docs] def head_height(self): """Returns the height of the head.""" return self.agent.robot.links_map["head"].pose.p[:, -1]
[docs] def torso_upright(self, info): return info["torso_xmat"][:, 2, 2]
[docs] def torso_vertical_orientation(self, info): return info["torso_xmat"][:, 2, :3].view(-1, 3)
[docs] def extremities(self, info): torso_frame = info["torso_xmat"][:, :3, :3].view(-1, 3, 3) torso_pos = self.agent.robot.links_map["torso"].pose.p positions = [] for side in ("left_", "right_"): for limb in ("hand", "foot"): torso_to_limb = ( self.agent.robot.links_map[side + limb].pose.p - torso_pos ).view(-1, 1, 3) positions.append( (torso_to_limb @ torso_frame).view(-1, 3) ) # reverse order mult == extrems in torso frame return torch.stack(positions, dim=1).view(-1, 12) # (b, 4, 3) -> (b,12)
@property
[docs] def center_of_mass_velocity(self): # """Returns the center of mass velocity of robot""" vels = torch.stack( [link.get_linear_velocity() for link in self.active_links], dim=0 ) # (num_links, b, 3) vels *= self.robot_links_mass.view(-1, 1, 1) com_vel = vels.sum(dim=0) / self.robot_mass # (b, 3) return com_vel
# cache re-used computation
[docs] def evaluate(self) -> dict: return dict( torso_xmat=self.agent.robot.links_map[ "torso" ].pose.to_transformation_matrix(), cmass_linvel=self.center_of_mass_velocity, )
[docs] def _load_scene(self, options: dict): loader = self.scene.create_mjcf_loader() actor_builders = loader.parse(self.agent.mjcf_path)["actor_builders"] for a in actor_builders: a.build(a.name) self.ground = build_ground(self.scene, floor_width=500) # allow tracking of humanoid self.camera_mount = self.scene.create_actor_builder().build_kinematic( "camera_mount" ) # cache for com velocity calc - doing so gives + 10 fps boost for 1024 envs self.active_links = [ link for link in self.agent.robot.get_links() if "dummy" not in link.name ] self.robot_links_mass = torch.stack( [link.mass[0] for link in self.active_links] ).to(self.device) self.robot_mass = torch.sum(self.robot_links_mass).item()
# humanoid reward components used across all tasks
[docs] def control_rew(self, action: Array): return ( rewards.tolerance(action, margin=1, value_at_margin=0, sigmoid="quadratic") .mean(dim=-1) .view(-1) ) # (b, a) -> (b)
[docs] def dont_move_rew(self, info): return ( rewards.tolerance(info["cmass_linvel"][:, :2], margin=2) .mean(dim=-1) .view(-1) ) # (b,3) -> (b)
[docs] def move_rew(self, info, move_speed=10): com_vel = torch.linalg.norm(info["cmass_linvel"][:, :2], dim=-1) return ( rewards.tolerance( com_vel, lower=move_speed, upper=np.inf, margin=move_speed, value_at_margin=0, sigmoid="linear", ) .mean(dim=-1) .view(-1) ) # (b,3) -> (b)
[docs] def standing_rew(self): return rewards.tolerance( self.head_height, lower=_STAND_HEIGHT, upper=float("inf"), margin=_STAND_HEIGHT / 4, ).view(-1)
[docs] def upright_rew(self, info: dict): return rewards.tolerance( self.torso_upright(info), lower=0.9, upper=float("inf"), sigmoid="linear", margin=1.9, value_at_margin=0, ).view(-1)
###
[docs]class HumanoidEnvStandard(HumanoidEnvBase):
[docs] SUPPORTED_REWARD_MODES = ("normalized_dense", "dense", "none")
[docs] agent: Union[Humanoid]
def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs)
[docs] def _get_obs_state_dict(self, info: dict): # our qpos model doesn't include the free joint, meaning qpos and qvel are 21 dims, not 27 # global dpos/dt and root (torso) dquaterion/dt are lost as result # we replace them with linear root linvel and root angularvel (equivalent info) return dict( agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos root_vel=self.agent.robot.links_map[ "dummy_root_0" ].get_linear_velocity(), # free joint info, (b, 3) root_quat_vel=self.agent.robot.links_map[ "dummy_root_0" ].get_angular_velocity(), # free joint info, (b, 3) head_height=self.head_height, # (b,1) com_velocity=info["cmass_linvel"], # (b, 3) extremities=self.extremities(info), link_linvels=torch.stack( [link.get_linear_velocity() for link in self.active_links], dim=1 ).view(-1, 16 * 3), link_angvels=torch.stack( [link.get_angular_velocity() for link in self.active_links], dim=1 ).view(-1, 16 * 3), qfrc=self.agent.robot.get_qf(), orient=self.agent.robot.links_map["dummy_root_0"].pose.q, )
# standard humanoid env resets if torso is below a certain point # therefore, we disable all contacts except feet and floor
[docs] def _load_scene(self, options: dict): super()._load_scene(options) self.ground.set_collision_group_bit(group=2, bit_idx=30, bit=1) for link in self.active_links: if not "foot" in link.name: link.set_collision_group_bit(group=2, bit_idx=30, bit=1)
[docs] def _initialize_episode(self, env_idx: torch.Tensor, options: dict): with torch.device(self.device): b = len(env_idx) # set agent root pose - torso now centered at dummy root at (0,0,0) pose = Pose.create_from_pq( p=torch.tensor([0, 0, 1.3]).unsqueeze(0).repeat(b, 1) ) self.agent.robot.set_root_pose(pose) # set randomized qpos noise_scale = 1e-2 qpos_noise = ( torch.rand(b, self.agent.robot.dof[0]) * (2 * noise_scale) ) - noise_scale qvel_noise = ( torch.rand(b, self.agent.robot.dof[0]) * (2 * noise_scale) ) - noise_scale self.agent.robot.set_qpos(qpos_noise) self.agent.robot.set_qvel(qvel_noise)
# in standard (non-hard) version, we terminate early if the torso is in unacceptable range
[docs] def evaluate(self) -> dict: info = super().evaluate() torso_z = self.agent.robot.links_map["torso"].pose.p[:, -1] failure = torch.logical_or(torso_z < 0.7, torso_z > 2.0) info.update(fail=failure) return info
[docs] def move_x_rew(self, info, move_speed=10): com_vel_x = info["cmass_linvel"][:, 0] return rewards.tolerance( com_vel_x, lower=move_speed, upper=np.inf, margin=move_speed, value_at_margin=0, sigmoid="linear", ).view( -1 ) # (b,3) -> (b)
@register_env("MS-HumanoidStand-v1", max_episode_steps=1000)
[docs]class HumanoidStand(HumanoidEnvStandard): """ **Task Description:** Humanoid robot stands upright **Randomizations:** - Humanoid robot is randomly rotated [-pi, pi] radians about z axis. - Humanoid qpos and qvel have added noise from uniform distribution [-1e-2, 1e-2] **Fail Conditions:** - Humanoid robot torso link leaves z range [0.7, 1.0] """
[docs] agent: Union[Humanoid]
def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs)
[docs] def _get_obs_state_dict(self, info: dict): # make all obs completely egocentric, for z rotation invariance, since stand has z rot randomization root_pose_mat = self.agent.robot.links_map[ "dummy_root_0" ].pose.to_transformation_matrix()[:, :3, :3] lin_vels = [ link.get_linear_velocity() for link in self.active_links ] # (links, b, 3) ang_vels = [ link.get_angular_velocity() for link in self.active_links ] # (links, b, 3) non_ego_vels = torch.stack( [*lin_vels, *ang_vels, info["cmass_linvel"]], dim=1 ) # (b, len(lin_vels)+len(ang_vels)+1, 3) ego_vels = (non_ego_vels @ root_pose_mat).view( -1, (len(lin_vels) + len(ang_vels) + 1) * 3 ) return dict( agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos head_height=self.head_height, # (b,1) egocentric_vels=ego_vels, extremities=self.extremities(info), )
# in stand, we also randomize the agent rotation around z axis
[docs] def _initialize_episode(self, env_idx: torch.Tensor, options: dict): super()._initialize_episode(env_idx=env_idx, options=options) with torch.device(self.device): b = len(env_idx) # randomize z rotation for humanoid pose alphas = torch.rand(b) * 2 * torch.pi quats = torch.zeros(b, 4) quats[:, 0] = (alphas / 2).cos() quats[:, -1] = (alphas / 2).sin() pose = Pose.create_from_pq(p=[0, 0, 1.3], q=quats) self.agent.robot.set_root_pose(pose)
[docs] def compute_normalized_dense_reward( self, obs: Any, action: torch.Tensor, info: dict ): small_control = (4 + self.control_rew(action)) / 5 stand_rew = ( small_control * self.standing_rew() * self.upright_rew(info) * self.dont_move_rew(info) ) return stand_rew
@register_env("MS-HumanoidWalk-v1", max_episode_steps=1000)
[docs]class HumanoidWalk(HumanoidEnvStandard): """ **Task Description:** Humanoid moves in x direction at walking pace **Randomizations:** - Humanoid qpos and qvel have added noise from uniform distribution [-1e-2, 1e-2] **Fail Conditions:** - Humanoid robot torso link leaves z range [0.7, 1.0] """
[docs] agent: Union[Humanoid]
def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs)
[docs] def compute_normalized_dense_reward( self, obs: Any, action: torch.Tensor, info: dict ): small_control = (4 + self.control_rew(action)) / 5 walk_rew = ( small_control * self.move_x_rew(info, _WALK_SPEED) * self.upright_rew(info) * self.standing_rew() ) alive_rew = 1 return (alive_rew + walk_rew) / 2
@register_env("MS-HumanoidRun-v1", max_episode_steps=1000)
[docs]class HumanoidRun(HumanoidEnvStandard): """ **Task Description:** Humanoid moves in x direction at running pace **Randomizations:** - Humanoid qpos and qvel have added noise from uniform distribution [-1e-2, 1e-2] **Fail Conditions:** - Humanoid robot torso link leaves z range [0.7, 1.0] """
[docs] agent: Union[Humanoid]
def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs)
[docs] def compute_normalized_dense_reward( self, obs: Any, action: torch.Tensor, info: dict ): # reward function used by mjx for ppo humanoid run rew_scale = 0.1 run_x_rew = info["cmass_linvel"][:, 0] alive_rew = 5 return rew_scale * ( alive_rew + 1.25 * run_x_rew - 0.1 * action.pow(2).sum(dim=-1) )
# TODO (xhin): more sac testing of hard version of environments # class HumanoidEnvHard(HumanoidEnvBase): # agent: Union[Humanoid] # def __init__(self, *args, robot_uids="humanoid", **kwargs): # super().__init__(*args, robot_uids=robot_uids, **kwargs) # def _get_obs_state_dict(self, info: dict): # # our qpos model doesn't include the free joint, meaning qpos and qvel are 21 dims, not 27 # # global dpos/dt and root (torso) dquaterion/dt lost # # we replace with linear root linvel and root angularvel (equivalent info) # return dict( # agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos # head_height=self.head_height, # (b,1) # extremities=self.extremities(info), # (b, 12) # torso_vertical=self.torso_vertical_orientation(info), # (b, 3) # com_velocity=info["cmass_linvel"], # (b, 3) # root_vel=self.agent.robot.links_map[ # "dummy_root_0" # ].get_linear_velocity(), # free joint info, (b, 3) # root_quat_vel=self.agent.robot.links_map[ # "dummy_root_0" # ].get_angular_velocity(), # free joint info, (b, 3) # ) # def _initialize_episode(self, env_idx: torch.Tensor, options: dict): # self.camera_mount.set_pose( # Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p) # ) # with torch.device(self.device): # b = len(env_idx) # # set agent root pose - torso now centered at dummy root at (0,0,0) # pose = Pose.create_from_pq( # p=[0, 0, 1.5], q=randomization.random_quaternions(b) # ) # self.agent.robot.set_root_pose(pose) # # set randomized qpos # random_qpos = torch.rand(b, self.agent.robot.dof[0]) # q_lims = self.agent.robot.get_qlimits() # q_ranges = q_lims[..., 1] - q_lims[..., 0] # random_qpos *= q_ranges # random_qpos += q_lims[..., 0] # self.agent.reset(random_qpos) # @register_env("MS-HumanoidStandHard-v1", max_episode_steps=1000) # class HumanoidStandHard(HumanoidEnvHard): # agent: Union[Humanoid] # def __init__(self, *args, robot_uids="humanoid", **kwargs): # super().__init__(*args, robot_uids=robot_uids, **kwargs) # def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict): # small_control = (4 + self.control_rew(action)) / 5 # return ( # small_control # * self.standing_rew() # * self.upright_rew(info) # * self.dont_move_rew(info) # ) # def compute_normalized_dense_reward( # self, obs: Any, action: torch.Tensor, info: dict # ): # return self.compute_dense_reward(obs, action, info) # @register_env("MS-HumanoidWalkHard-v1", max_episode_steps=1000) # class HumanoidWalkHard(HumanoidEnvHard): # agent: Union[Humanoid] # def __init__(self, *args, robot_uids="humanoid", **kwargs): # super().__init__(*args, robot_uids=robot_uids, **kwargs) # def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict): # small_control = (4 + self.control_rew(action)) / 5 # return ( # small_control # * self.standing_rew() # * self.upright_rew(info) # * self.move_rew(info, _WALK_SPEED) # ) # def compute_normalized_dense_reward( # self, obs: Any, action: torch.Tensor, info: dict # ): # return self.compute_dense_reward(obs, action, info) # @register_env("MS-HumanoidRunHard-v1", max_episode_steps=1000) # class HumanoidRunHard(HumanoidEnvHard): # agent: Union[Humanoid] # def __init__(self, *args, robot_uids="humanoid", **kwargs): # super().__init__(*args, robot_uids=robot_uids, **kwargs) # def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict): # small_control = (4 + self.control_rew(action)) / 5 # return ( # small_control # * self.standing_rew() # * self.upright_rew(info) # * self.move_rew(info, _RUN_SPEED) # ) # def compute_normalized_dense_reward( # self, obs: Any, action: torch.Tensor, info: dict # ): # return self.compute_dense_reward(obs, action, info)