import os
from typing import Union
import cv2
import torch
from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import common, sapien_utils
from mani_skill.utils.structs.actor import Actor
from mani_skill.utils.structs.articulation import Articulation
from mani_skill.utils.structs.link import Link
from mani_skill.utils.structs.types import SimConfig
[docs]class BaseDigitalTwinEnv(BaseEnv):
"""Base Environment class for easily setting up evaluation digital twins for real2sim and sim2real
This is based on the [SIMPLER](https://simpler-env.github.io/) and currently has the following tricks for
making accurate simulated environments of real world datasets
Greenscreening: Add a greenscreened real image to the background to make the images more realistic and closer to the distribution
of real world data. To use the functionality in your own custom task you can do the following:
.. code-block:: python
class MyTask(BaseDigitalTwinEnv):
def __init__(self, **kwargs):
self.rgb_overlay_paths = {"camera_name": "path/to/greenscreen/image.png"}
super().__init__(**kwargs)
def _load_scene(self, options: dict):
# load your objects as usual e.g. a cube at self.cube
# exclude the robot and cube from the greenscreen process
self.remove_object_from_greenscreen(self.robot)
self.remove_object_from_greenscreen(self.cube)
Use `self.remove_object_from_greenscreen(object: Actor | Link | Articulation)` to exclude those objects from the greenscreen process.
"""
[docs] rgb_overlay_paths: dict[str, str] = None
"""dict mapping camera name to the file path of the greenscreening image"""
[docs] _rgb_overlay_images: dict[str, torch.Tensor] = dict()
"""dict mapping camera name to the image torch tensor"""
[docs] rgb_overlay_mode: str = "background"
"""which RGB overlay mode to use during the greenscreen process. The default is 'background' which enables greenscreening like normal. The other option is 'debug' mode which
will make the opacity of the original render and greenscreen overlay both 50%. The third option is "none" which will not perform any greenscreening."""
[docs] _objects_to_remove_from_greenscreen: list[Union[Actor, Link]] = []
"""list of articulations/actors/links that should be removed from the greenscreen process"""
[docs] _segmentation_ids_to_keep: torch.Tensor = None
"""torch tensor of segmentation ids that reference the objects that should not be greenscreened"""
def __init__(self, **kwargs):
# Load the "greenscreen" image, which is used to overlay the background portions of simulation observation
if self.rgb_overlay_paths is not None:
for camera_name, path in self.rgb_overlay_paths.items():
if not os.path.exists(path):
raise FileNotFoundError(f"rgb_overlay_path {path} is not found.")
self._rgb_overlay_images[camera_name] = cv2.cvtColor(
cv2.imread(path), cv2.COLOR_BGR2RGB
) # (H, W, 3); float32
else:
self._rgb_overlay_images = None
super().__init__(**kwargs)
@property
[docs] def _default_sim_config(self):
return SimConfig()
@property
[docs] def _default_human_render_camera_configs(self):
pose = sapien_utils.look_at([0.6, 0.7, 0.6], [0.0, 0.0, 0.35])
return CameraConfig(
"render_camera", pose=pose, width=512, height=512, fov=1, near=0.01, far=100
)
[docs] def _load_scene(self, options: dict):
"""
Load assets for a digital twin scene in
"""
[docs] def remove_object_from_greenscreen(self, object: Union[Articulation, Actor, Link]):
"""remove an actor/articulation/link from the greenscreen process"""
if isinstance(object, Articulation):
for link in object.get_links():
self._objects_to_remove_from_greenscreen.append(link)
elif isinstance(object, Actor):
self._objects_to_remove_from_greenscreen.append(object)
elif isinstance(object, Link):
self._objects_to_remove_from_greenscreen.append(object)
[docs] def _green_sceen_rgb(self, rgb, segmentation, overlay_img):
"""returns green screened RGB data given a batch of RGB and segmentation images and one overlay image"""
actor_seg = segmentation[..., 0]
mask = torch.ones_like(actor_seg, device=actor_seg.device, dtype=torch.bool)
if self._segmentation_ids_to_keep.device != actor_seg.device:
self._segmentation_ids_to_keep = self._segmentation_ids_to_keep.to(
actor_seg.device
)
if self.rgb_overlay_mode == "background":
# only overlay the background and keep the foregrounds (robot and target objects) rendered in simulation
mask[
torch.isin(
actor_seg,
self._segmentation_ids_to_keep,
)
] = 0
mask = mask[..., None]
# perform overlay on the RGB observation image
if "debug" not in self.rgb_overlay_mode:
rgb = rgb * (~mask) + overlay_img * mask
else:
rgb = rgb * 0.5 + overlay_img * 0.5
rgb = rgb.to(torch.uint8)
return rgb
[docs] def _get_obs_sensor_data(self, apply_texture_transforms: bool = True):
obs = super()._get_obs_sensor_data(apply_texture_transforms)
# "greenscreen" process
if self.rgb_overlay_mode == "none":
return obs
if (
self.obs_mode_struct.visual.rgb
and self.obs_mode_struct.visual.segmentation
and self.rgb_overlay_paths is not None
):
# get the actor ids of objects to manipulate; note that objects here are not articulated
for camera_name in self._rgb_overlay_images.keys():
# obtain overlay mask based on segmentation info
assert (
"segmentation" in obs[camera_name].keys()
), "Image overlay requires segment info in the observation!"
if (
self._rgb_overlay_images[camera_name].device
!= obs[camera_name]["rgb"].device
):
self._rgb_overlay_images[camera_name] = self._rgb_overlay_images[
camera_name
].to(obs[camera_name]["rgb"].device)
overlay_img = self._rgb_overlay_images[camera_name]
green_screened_rgb = self._green_sceen_rgb(
obs[camera_name]["rgb"],
obs[camera_name]["segmentation"],
overlay_img,
)
obs[camera_name]["rgb"] = green_screened_rgb
return obs