Source code for mani_skill.agents.controllers.pd_joint_pos

from dataclasses import dataclass, field
from typing import Sequence, Union

import numpy as np
import torch
from gymnasium import spaces

from mani_skill.utils import common
from mani_skill.utils.logging_utils import logger
from mani_skill.utils.structs.types import Array, DriveMode

from .base_controller import BaseController, ControllerConfig


[docs]class PDJointPosController(BaseController):
[docs] config: "PDJointPosControllerConfig"
[docs] _start_qpos = None
[docs] _target_qpos = None
[docs] sets_target_qpos = True
[docs] sets_target_qvel = False
[docs] def _get_joint_limits(self): qlimits = ( self.articulation.get_qlimits()[0, self.active_joint_indices].cpu().numpy() ) # Override if specified if self.config.lower is not None: qlimits[:, 0] = self.config.lower if self.config.upper is not None: qlimits[:, 1] = self.config.upper return qlimits
[docs] def _initialize_action_space(self): joint_limits = self._get_joint_limits() low, high = joint_limits[:, 0], joint_limits[:, 1] self.single_action_space = spaces.Box(low, high, dtype=np.float32)
[docs] def set_drive_property(self): n = len(self.joints) stiffness = np.broadcast_to(self.config.stiffness, n) damping = np.broadcast_to(self.config.damping, n) force_limit = np.broadcast_to(self.config.force_limit, n) friction = np.broadcast_to(self.config.friction, n) for i, joint in enumerate(self.joints): drive_mode = self.config.drive_mode if not isinstance(drive_mode, str): drive_mode = drive_mode[i] joint.set_drive_properties( stiffness[i], damping[i], force_limit=force_limit[i], mode=drive_mode ) joint.set_friction(friction[i])
[docs] def reset(self): super().reset() self._step = 0 # counter of simulation steps after action is set if self._start_qpos is None: self._start_qpos = self.qpos.clone() else: self._start_qpos[self.scene._reset_mask] = self.qpos[ self.scene._reset_mask ].clone() if self._target_qpos is None: self._target_qpos = self.qpos.clone() else: self._target_qpos[self.scene._reset_mask] = self.qpos[ self.scene._reset_mask ].clone()
[docs] def set_drive_targets(self, targets): self.articulation.set_joint_drive_targets( targets, self.joints, self.active_joint_indices )
[docs] def set_action(self, action: Array): action = self._preprocess_action(action) self._step = 0 self._start_qpos = self.qpos if self.config.use_delta: if self.config.use_target: self._target_qpos = self._target_qpos + action else: self._target_qpos = self._start_qpos + action else: # Compatible with mimic controllers. Need to clone here otherwise cannot do in-place replacements in the reset function self._target_qpos = torch.broadcast_to( action, self._start_qpos.shape ).clone() if self.config.interpolate: self._step_size = (self._target_qpos - self._start_qpos) / self._sim_steps else: self.set_drive_targets(self._target_qpos)
[docs] def before_simulation_step(self): self._step += 1 # Compute the next target via a linear interpolation if self.config.interpolate: targets = self._start_qpos + self._step_size * self._step self.set_drive_targets(targets)
[docs] def get_state(self) -> dict: if self.config.use_target: return {"target_qpos": self._target_qpos} return {}
[docs] def set_state(self, state: dict): if self.config.use_target: self._target_qpos = state["target_qpos"]
@dataclass
[docs]class PDJointPosControllerConfig(ControllerConfig):
[docs] lower: Union[None, float, Sequence[float]]
[docs] upper: Union[None, float, Sequence[float]]
[docs] stiffness: Union[float, Sequence[float]]
[docs] damping: Union[float, Sequence[float]]
[docs] force_limit: Union[float, Sequence[float]] = 1e10
[docs] friction: Union[float, Sequence[float]] = 0.0
[docs] use_delta: bool = False
[docs] use_target: bool = False
[docs] interpolate: bool = False
[docs] normalize_action: bool = True
[docs] drive_mode: Union[Sequence[DriveMode], DriveMode] = "force"
[docs] controller_cls = PDJointPosController
[docs]class PDJointPosMimicController(PDJointPosController):
[docs] config: "PDJointPosMimicControllerConfig"
[docs] def _initialize_joints(self): super()._initialize_joints() # do some sanity checks and setup the mimic controller self.mimic_joint_indices = [] self.mimic_control_joint_indices = [] if len(self.config.mimic) == 0: if len(self.config.joint_names) == 2: logger.warning( f"Mimic targets dictionary is missing for controller config for {self.articulation.name}. Assuming the first joint is the control joint and the second joint is the mimic joint" ) self.config.mimic = { self.config.joint_names[0]: {"joint": self.config.joint_names[1]} } else: raise ValueError( "Mimic targets dictionary is missing. Please provide a mimic targets dictionary to setup mimic controllers with more than 2 joints" ) self._multiplier = torch.ones( len(self.config.mimic), device=self.device ) # this is parallel to self.mimic_joint_indices self._offset = torch.zeros(len(self.config.mimic), device=self.device) for i, (mimic_joint_name, mimic_data) in enumerate(self.config.mimic.items()): control_joint_name = mimic_data["joint"] multiplier = mimic_data.get("multiplier", 1.0) offset = mimic_data.get("offset", 0.0) assert ( self.articulation.joints_map[mimic_joint_name].active_index is not None ), f"Mimic joint {mimic_joint_name} is not active, cannot be used as a mimic joint in a mimic controller" assert ( self.articulation.joints_map[control_joint_name].active_index is not None ), f"Control joint {control_joint_name} is not active, cannot be used as a control joint in a mimic controller" # NOTE (stao): we are assuming all the articulations are the exact same structure. At the moment I see very little reason to try and support training/evaluating on different embodiments # simultaneously in one process self.mimic_joint_indices.append( torch.argwhere( self.active_joint_indices == self.articulation.joints_map[mimic_joint_name].active_index[0] ) ) self.mimic_control_joint_indices.append( torch.argwhere( self.active_joint_indices == self.articulation.joints_map[control_joint_name].active_index[0] ) ) self._multiplier[i] = multiplier self._offset[i] = offset self.mimic_joint_indices = torch.tensor( self.mimic_joint_indices, dtype=torch.int32, device=self.device ) self.mimic_control_joint_indices = torch.tensor( self.mimic_control_joint_indices, dtype=torch.int32, device=self.device ) """list of control joint indices corresponding to mimic joint indices""" self.control_joint_indices = torch.unique(self.mimic_control_joint_indices).to( torch.int32 ) """list of all directly controlled joint indices""" self.effective_dof = len(self.control_joint_indices)
[docs] def _get_joint_limits(self): joint_limits = super()._get_joint_limits() joint_limits = joint_limits[self.control_joint_indices.cpu().numpy()] if len(joint_limits.shape) == 1: joint_limits = joint_limits[None, :] return joint_limits
[docs] def set_action(self, action: Array): action = self._preprocess_action(action) self._step = 0 self._start_qpos = self.qpos if self.config.use_delta: if self.config.use_target: self._target_qpos[:, self.control_joint_indices] += action else: self._target_qpos[:, self.control_joint_indices] = ( self._start_qpos[:, self.control_joint_indices] + action ) else: self._target_qpos[:, self.control_joint_indices] = action self._target_qpos[:, self.mimic_joint_indices] = ( self._target_qpos[:, self.mimic_control_joint_indices] * self._multiplier[None, :] + self._offset[None, :] ) if self.config.interpolate: self._step_size = (self._target_qpos - self._start_qpos) / self._sim_steps else: self.set_drive_targets(self._target_qpos)
[docs] def __repr__(self): data = {k: v["joint"] for k, v in self.config.mimic.items()} main_str = "{\n" for k, v in data.items(): main_str += f" {k}: {v},\n" main_str += "}" data = main_str return f"PDJointPosMimicController(dof={self.single_action_space.shape[0]}, active_joints={len(self.joints)}, mimic_to_control_joint_map={main_str})"
@dataclass
[docs]class PDJointPosMimicControllerConfig(PDJointPosControllerConfig): """ PD Joint Position mimic controller configuration. This kind of controller is used to emuluate a mimic joint. For some simulation backends mimic joints are not well simulated and/or are hard to tune and so a mimic controller can be used for better stability as well as alignment with a real robot. The `joint_names` field is expected to be a list of all the joints, whether they are the joints to be controlled or the joints that are controlled via mimicing. `mimic` is a dictionary that maps the mimic joint name to a dictionary of the format dict(joint: str, multiplier: float, offset: float). `joint` is the controlling joint name and is required. All given joints in `joint_names` must be in the `mimic` dictionary as either a key or a value. The ones that are keys are referred to as the mimic joints and the ones that are values are referred to as the control joints. Mimic joints are the ones directly controlled by the agent/user and control joints are implicitly controlled by following the mimic joints via the following equation: q_mimic = q_controlling * multiplier + offset By default, the multiplier is 1.0 and the offset is 0.0. """
[docs] controller_cls = PDJointPosMimicController
[docs] mimic: dict[str, dict[str, float]] = field(default_factory=dict)
"""the mimic targets. Maps the actual mimic joint name to a dictionary of the format dict(joint: str, multiplier: float, offset: float)"""