"""Adapted from https://github.com/google-deepmind/dm_control/blob/main/dm_control/suite/cartpole.py"""
import os
from typing import Any, Optional, Union
import numpy as np
import sapien
import torch
from transforms3d.euler import euler2quat
from mani_skill.agents.base_agent import BaseAgent
from mani_skill.agents.controllers import *
from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.envs.utils import randomization, rewards
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import common, sapien_utils
from mani_skill.utils.registration import register_env
from mani_skill.utils.structs.pose import Pose
from mani_skill.utils.structs.types import (
Array,
GPUMemoryConfig,
SceneConfig,
SimConfig,
)
[docs]MJCF_FILE = f"{os.path.join(os.path.dirname(__file__), 'assets/cartpole.xml')}"
[docs]class CartPoleRobot(BaseAgent):
[docs] disable_self_collisions = True
@property
[docs] def _controller_configs(self):
# NOTE it is impossible to copy joint properties from original xml files, have to tune manually until
# it looks approximately correct
pd_joint_delta_pos = PDJointPosControllerConfig(
["slider"],
-1,
1,
damping=200,
stiffness=2000,
use_delta=True,
)
rest = PassiveControllerConfig(["hinge_1"], damping=0, friction=0)
return dict(
pd_joint_delta_pos=dict(
slider=pd_joint_delta_pos, rest=rest, balance_passive_force=False
)
)
[docs] def _load_articulation(
self, initial_pose: Optional[Union[sapien.Pose, Pose]] = None
):
"""
Load the robot articulation
"""
loader = self.scene.create_mjcf_loader()
asset_path = str(self.mjcf_path)
loader.name = self.uid
# only need the robot
builder = loader.parse(asset_path)["articulation_builders"][0]
builder.initial_pose = initial_pose
self.robot = builder.build()
assert self.robot is not None, f"Fail to load URDF/MJCF from {asset_path}"
# Cache robot link ids
self.robot_link_names = [link.name for link in self.robot.get_links()]
# @register_env("MS-CartPole-v1", max_episode_steps=500)
# class CartPoleEnv(BaseEnv):
# SUPPORTED_REWARD_MODES = ["sparse", "none"]
# SUPPORTED_ROBOTS = [CartPoleRobot]
# agent: Union[CartPoleRobot]
# CART_RANGE = [-0.25, 0.25]
# ANGLE_COSINE_RANGE = [0.995, 1]
# def __init__(self, *args, robot_uids=CartPoleRobot, **kwargs):
# super().__init__(*args, robot_uids=robot_uids, **kwargs)
[docs]class CartpoleEnv(BaseEnv):
[docs] agent: Union[CartPoleRobot]
def __init__(self, *args, robot_uids=CartPoleRobot, **kwargs):
super().__init__(*args, robot_uids=robot_uids, **kwargs)
@property
[docs] def _default_sim_config(self):
return SimConfig(
spacing=20,
scene_config=SceneConfig(
solver_position_iterations=4, solver_velocity_iterations=0
),
)
@property
[docs] def _default_sensor_configs(self):
pose = sapien_utils.look_at(eye=[0, -4, 1], target=[0, 0, 1])
return [CameraConfig("base_camera", pose, 128, 128, np.pi / 2, 0.01, 100)]
@property
[docs] def _default_human_render_camera_configs(self):
pose = sapien_utils.look_at(eye=[0, -4, 1], target=[0, 0, 1])
return CameraConfig("render_camera", pose, 512, 512, 1, 0.01, 100)
[docs] def _load_scene(self, options: dict):
loader = self.scene.create_mjcf_loader()
actor_builders = loader.parse(MJCF_FILE)["actor_builders"]
for a in actor_builders:
a.initial_pose = sapien.Pose()
a.build(a.name)
# background visual wall
self.wall = self.scene.create_actor_builder()
self.wall.add_box_visual(
half_size=(1e-3, 20, 10),
material=sapien.render.RenderMaterial(
base_color=np.array([0.3, 0.3, 0.3, 1])
),
)
self.wall.initial_pose = sapien.Pose(p=[0, 1, 1], q=euler2quat(0, 0, np.pi / 2))
self.wall.build_static(name="wall")
[docs] def evaluate(self):
return dict()
@property
[docs] def pole_angle_cosine(self):
return torch.cos(self.agent.robot.joints_map["hinge_1"].qpos)
[docs] def compute_dense_reward(self, obs: Any, action: Array, info: dict):
cart_pos = self.agent.robot.links_map["cart"].pose.p[
:, 0
] # (B, ), we only care about x position
centered = rewards.tolerance(cart_pos, margin=2)
centered = (1 + centered) / 2 # (B, )
small_control = rewards.tolerance(
action, margin=1, value_at_margin=0, sigmoid="quadratic"
)[:, 0]
small_control = (4 + small_control) / 5
angular_vel = self.agent.robot.get_qvel()[:, 1]
small_velocity = rewards.tolerance(angular_vel, margin=5)
small_velocity = (1 + small_velocity) / 2 # (B, )
upright = (self.pole_angle_cosine + 1) / 2 # (B, )
# upright is 1 when the pole is upright, 0 when the pole is upside down
# small_control is 1 when the action is small, 0.8 when the action is large
# small_velocity is 1 when the angular velocity is small, 0.5 when the angular velocity is large
# centered is 1 when the cart is centered, 0 when the cart is at the edge of the screen
reward = upright * centered * small_control * small_velocity
return reward
[docs] def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict):
# this should be equal to compute_dense_reward / max possible reward
max_reward = 1.0
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
@register_env("MS-CartpoleBalance-v1", max_episode_steps=1000)
[docs]class CartpoleBalanceEnv(CartpoleEnv):
"""
**Task Description:**
Use the Cartpole robot to balance a pole on a cart.
**Randomizations:**
- Pole direction is randomized around the vertical axis. the range is [-0.05, 0.05] radians.
**Fail Conditions:**
- Pole is lower than the horizontal plane
"""
[docs] _sample_video_link = "https://github.com/haosulab/ManiSkill/raw/main/figures/environment_demos/MS-CartpoleBalance-v1_rt.mp4"
def __init__(self, *args, **kwargs):
super().__init__(
*args,
**kwargs,
)
[docs] def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
with torch.device(self.device):
b = len(env_idx)
qpos = torch.zeros((b, 2))
qpos[:, 0] = randomization.uniform(-0.1, 0.1, size=(b,))
qpos[:, 1] = randomization.uniform(-0.034, 0.034, size=(b,))
qvel = torch.randn(size=(b, 2)) * 0.01
self.agent.robot.set_qpos(qpos)
self.agent.robot.set_qvel(qvel)
[docs] def evaluate(self):
return dict(fail=self.pole_angle_cosine < 0)
@register_env("MS-CartpoleSwingUp-v1", max_episode_steps=1000)
[docs]class CartpoleSwingUpEnv(CartpoleEnv):
"""
**Task Description:**
Use the Cartpole robot to swing up a pole on a cart.
**Randomizations:**
- Pole direction is randomized around the whole circle. the range is [-pi, pi] radians.
**Success Conditions:**
- No specific success conditions. The task is considered successful if the pole is upright for the whole episode.
"""
[docs] SUPPORTED_REWARD_MODES = ("normalized_dense", "dense", "none")
def __init__(self, *args, **kwargs):
super().__init__(
*args,
**kwargs,
)
[docs] def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
with torch.device(self.device):
b = len(env_idx)
qpos = torch.zeros((b, 2))
qpos[:, 0] = torch.randn((b,)) * 0.01
qpos[:, 1] = torch.randn((b,)) * 0.01 + torch.pi
qvel = torch.randn(size=(b, 2)) * 0.01
self.agent.robot.set_qpos(qpos)
self.agent.robot.set_qvel(qvel)