Source code for mani_skill.agents.controllers.pd_joint_pos

from dataclasses import dataclass, field
from typing import Sequence, TypedDict, Union

import numpy as np
import torch
from gymnasium import spaces
from typing_extensions import NotRequired

from mani_skill.utils.logging_utils import logger
from mani_skill.utils.structs.types import DriveMode

from .base_controller import BaseController, ControllerConfig



[docs]
class PDJointPosController(BaseController):

[docs]
    config: "PDJointPosControllerConfig"


[docs]
    _start_qpos: torch.Tensor


[docs]
    _target_qpos: torch.Tensor


[docs]
    sets_target_qpos = True


[docs]
    sets_target_qvel = False


    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._start_qpos = torch.zeros(
            (self.scene.num_envs, self.active_joint_indices.shape[0]),
            device=self.device,
        )
        self._target_qpos = torch.zeros(
            (self.scene.num_envs, self.active_joint_indices.shape[0]),
            device=self.device,
        )


[docs]
    def _get_joint_limits(self):
        qlimits = (
            self.articulation.get_qlimits()[0, self.active_joint_indices].cpu().numpy()
        )
        # Override if specified
        if self.config.lower is not None:
            qlimits[:, 0] = self.config.lower
        if self.config.upper is not None:
            qlimits[:, 1] = self.config.upper
        return qlimits



[docs]
    def _initialize_action_space(self):
        joint_limits = self._get_joint_limits()
        low, high = joint_limits[:, 0], joint_limits[:, 1]
        self.single_action_space = spaces.Box(low, high, dtype=np.float32)



[docs]
    def set_drive_property(self):
        n = len(self.joints)
        stiffness = np.broadcast_to(self.config.stiffness, n)
        damping = np.broadcast_to(self.config.damping, n)
        force_limit = np.broadcast_to(self.config.force_limit, n)
        friction = np.broadcast_to(self.config.friction, n)

        for i, joint in enumerate(self.joints):
            drive_mode = self.config.drive_mode
            if not isinstance(drive_mode, str):
                drive_mode = drive_mode[i]
            joint.set_drive_properties(
                stiffness[i], damping[i], force_limit=force_limit[i], mode=drive_mode
            )
            joint.set_friction(friction[i])



[docs]
    def reset(self):
        super().reset()
        self._step = 0  # counter of simulation steps after action is set
        self._start_qpos[self.scene._reset_mask] = self.qpos[
            self.scene._reset_mask
        ].clone()

        self._target_qpos[self.scene._reset_mask] = self.qpos[
            self.scene._reset_mask
        ].clone()



[docs]
    def set_drive_targets(self, targets):
        self.articulation.set_joint_drive_targets(
            targets, self.joints, self.active_joint_indices
        )



[docs]
    def set_action(self, action: torch.Tensor):
        action = self._preprocess_action(action)
        self._step = 0
        self._start_qpos = self.qpos
        if self.config.use_delta:
            if self.config.use_target:
                self._target_qpos = self._target_qpos + action
            else:
                self._target_qpos = self._start_qpos + action
        else:
            # Compatible with mimic controllers. Need to clone here otherwise cannot do in-place replacements in the reset function
            self._target_qpos = torch.broadcast_to(
                action, self._start_qpos.shape
            ).clone()
        if self.config.interpolate:
            self._step_size = (self._target_qpos - self._start_qpos) / self._sim_steps
        else:
            self.set_drive_targets(self._target_qpos)



[docs]
    def before_simulation_step(self):
        self._step += 1

        # Compute the next target via a linear interpolation
        if self.config.interpolate:
            targets = self._start_qpos + self._step_size * self._step
            self.set_drive_targets(targets)



[docs]
    def get_state(self) -> dict:
        if self.config.use_target:
            return {"target_qpos": self._target_qpos}
        return {}



[docs]
    def set_state(self, state: dict):
        if self.config.use_target:
            self._target_qpos = state["target_qpos"]




@dataclass

[docs]
class PDJointPosControllerConfig(ControllerConfig):

[docs]
    lower: Union[None, float, Sequence[float], np.ndarray]

    """The lower bound for the joint position. If none, the lower bound defaults to the lower joint limit."""

[docs]
    upper: Union[None, float, Sequence[float], np.ndarray]

    """The upper bound for the joint position. If none, the upper bound defaults to the upper joint limit."""

[docs]
    stiffness: Union[float, Sequence[float], np.ndarray]


[docs]
    damping: Union[float, Sequence[float], np.ndarray]


[docs]
    force_limit: Union[float, Sequence[float], np.ndarray] = 1e10


[docs]
    friction: Union[float, Sequence[float], np.ndarray] = 0.0


[docs]
    use_delta: bool = False


[docs]
    use_target: bool = False


[docs]
    interpolate: bool = False


[docs]
    normalize_action: bool = True


[docs]
    drive_mode: Union[Sequence[DriveMode], DriveMode] = "force"


[docs]
    controller_cls = PDJointPosController





[docs]
class PDJointPosMimicController(PDJointPosController):

[docs]
    config: "PDJointPosMimicControllerConfig"



[docs]
    def _initialize_joints(self):
        super()._initialize_joints()
        # do some sanity checks and setup the mimic controller

        self.mimic_joint_indices = []
        self.mimic_control_joint_indices = []
        if len(self.config.mimic) == 0:
            if len(self.config.joint_names) == 2:
                logger.warning(
                    f"Mimic targets dictionary is missing for controller config for {self.articulation.name}. Assuming the first joint is the control joint and the second joint is the mimic joint"
                )
                self.config.mimic = {
                    self.config.joint_names[0]: {"joint": self.config.joint_names[1]}
                }
            else:
                raise ValueError(
                    "Mimic targets dictionary is missing. Please provide a mimic targets dictionary to setup mimic controllers with more than 2 joints"
                )

        self._multiplier = torch.ones(
            len(self.config.mimic), device=self.device
        )  # this is parallel to self.mimic_joint_indices
        self._offset = torch.zeros(len(self.config.mimic), device=self.device)

        for i, (mimic_joint_name, mimic_data) in enumerate(self.config.mimic.items()):
            control_joint_name = mimic_data["joint"]
            multiplier = mimic_data.get("multiplier", 1.0)
            offset = mimic_data.get("offset", 0.0)
            assert (
                self.articulation.joints_map[mimic_joint_name].active_index is not None
            ), f"Mimic joint {mimic_joint_name} is not active, cannot be used as a mimic joint in a mimic controller"
            assert (
                self.articulation.joints_map[control_joint_name].active_index
                is not None
            ), f"Control joint {control_joint_name} is not active, cannot be used as a control joint in a mimic controller"

            # NOTE (stao): we are assuming all the articulations are the exact same structure. At the moment I see very little reason to try and support training/evaluating on different embodiments
            # simultaneously in one process
            self.mimic_joint_indices.append(
                torch.argwhere(
                    self.active_joint_indices
                    == self.articulation.joints_map[mimic_joint_name].active_index[0]
                )
            )
            self.mimic_control_joint_indices.append(
                torch.argwhere(
                    self.active_joint_indices
                    == self.articulation.joints_map[control_joint_name].active_index[0]
                )
            )

            self._multiplier[i] = multiplier
            self._offset[i] = offset

        self.mimic_joint_indices = torch.tensor(
            self.mimic_joint_indices, dtype=torch.int32, device=self.device
        )
        self.mimic_control_joint_indices = torch.tensor(
            self.mimic_control_joint_indices, dtype=torch.int32, device=self.device
        )
        """list of control joint indices corresponding to mimic joint indices"""
        self.control_joint_indices = torch.unique(self.mimic_control_joint_indices).to(
            torch.int32
        )
        """list of all directly controlled joint indices"""

        self.effective_dof = len(self.control_joint_indices)



[docs]
    def _get_joint_limits(self):
        joint_limits = super()._get_joint_limits()
        joint_limits = joint_limits[self.control_joint_indices.cpu().numpy()]
        if len(joint_limits.shape) == 1:
            joint_limits = joint_limits[None, :]
        return joint_limits



[docs]
    def set_action(self, action: torch.Tensor):
        action = self._preprocess_action(action)
        self._step = 0
        self._start_qpos = self.qpos
        if self.config.use_delta:
            if self.config.use_target:
                self._target_qpos[:, self.control_joint_indices] += action
            else:
                self._target_qpos[:, self.control_joint_indices] = (
                    self._start_qpos[:, self.control_joint_indices] + action
                )
        else:
            self._target_qpos[:, self.control_joint_indices] = action
        self._target_qpos[:, self.mimic_joint_indices] = (
            self._target_qpos[:, self.mimic_control_joint_indices]
            * self._multiplier[None, :]
            + self._offset[None, :]
        )
        if self.config.interpolate:
            self._step_size = (self._target_qpos - self._start_qpos) / self._sim_steps
        else:
            self.set_drive_targets(self._target_qpos)



[docs]
    def __repr__(self):
        data = {k: v["joint"] for k, v in self.config.mimic.items()}
        main_str = "{\n"
        for k, v in data.items():
            main_str += f"    {k}: {v},\n"
        main_str += "}"
        data = main_str
        return f"PDJointPosMimicController(dof={self.single_action_space.shape[0]}, active_joints={len(self.joints)}, mimic_to_control_joint_map={main_str})"





[docs]
class MimicConfig(TypedDict):

[docs]
    joint: str


[docs]
    multiplier: NotRequired[float]


[docs]
    offset: NotRequired[float]




@dataclass

[docs]
class PDJointPosMimicControllerConfig(PDJointPosControllerConfig):
    """
    PD Joint Position mimic controller configuration. This kind of controller is used to emuluate a mimic joint. For some simulation backends mimic joints are not
    well simulated and/or are hard to tune and so a mimic controller can be used for better stability as well as alignment with a real robot.

    The `joint_names` field is expected to be a list of all the joints, whether they are the joints to be controlled or the joints that are controlled via mimicing.

    `mimic` is a dictionary that maps the mimic joint name to a dictionary of the format dict(joint: str, multiplier: float, offset: float). `joint` is the controlling joint name and is required. All given joints in
    `joint_names` must be in the `mimic` dictionary as either a key or a value. The ones that are keys are referred to as the mimic joints and the ones that are values
    are referred to as the control joints. Mimic joints are the ones directly controlled by the agent/user and control joints are implicitly controlled by following the mimic joints via the following equation:

    q_mimic = q_controlling * multiplier + offset

    By default, the multiplier is 1.0 and the offset is 0.0.
    """


[docs]
    controller_cls = PDJointPosMimicController


[docs]
    mimic: dict[str, MimicConfig] = field(default_factory=dict)

    """the mimic targets. Maps the actual mimic joint name to a dictionary of the format dict(joint: str, multiplier: float, offset: float)"""