diff --git a/gym/core.py b/gym/core.py index 22538e23e84..c3d06cd8801 100644 --- a/gym/core.py +++ b/gym/core.py @@ -1,6 +1,16 @@ """Core API for Environment, Wrapper, ActionWrapper, RewardWrapper and ObservationWrapper.""" import sys -from typing import Generic, Optional, SupportsFloat, Tuple, TypeVar, Union +from typing import ( + Any, + Dict, + Generic, + List, + Optional, + SupportsFloat, + Tuple, + TypeVar, + Union, +) from gym import spaces from gym.logger import deprecation, warn @@ -14,6 +24,44 @@ ObsType = TypeVar("ObsType") ActType = TypeVar("ActType") +RenderFrame = TypeVar("RenderFrame") + + +class _EnvDecorator(type): # TODO: remove with gym 1.0 + """Metaclass used for adding deprecation warning to the mode kwarg in the render method.""" + + def __new__(cls, name, bases, attr): + if "render" in attr.keys(): + attr["render"] = _EnvDecorator._deprecate_mode(attr["render"]) + + return super().__new__(cls, name, bases, attr) + + @staticmethod + def _deprecate_mode(render_func): # type: ignore + render_return = Optional[Union[RenderFrame, List[RenderFrame]]] + + def render( + self: object, *args: Tuple[Any], **kwargs: Dict[str, Any] + ) -> render_return: + if "mode" in kwargs.keys(): + deprecation( + "The argument mode in render method is deprecated; " + "use render_mode during environment initialization instead.\n" + "See here for more information: https://www.gymlibrary.ml/content/api/" + ) + elif self.spec is not None and "render_mode" not in self.spec.kwargs.keys(): # type: ignore + deprecation( + "You are calling render method, " + "but you didn't specified the argument render_mode at environment initialization. " + "To maintain backward compatibility, the environment will render in human mode.\n" + "If you want to render in human mode, initialize the environment in this way: " + "gym.make('EnvName', render_mode='human') and don't call the render method.\n" + "See here for more information: https://www.gymlibrary.ml/content/api/" + ) + + return render_func(self, *args, **kwargs) + + return render class Env(Generic[ObsType, ActType]): @@ -43,8 +91,11 @@ class Env(Generic[ObsType, ActType]): Note: a default reward range set to :math:`(-\infty,+\infty)` already exists. Set it if you want a narrower range. """ + __metaclass__ = _EnvDecorator + # Set this in SOME subclasses metadata = {"render_modes": []} + render_mode = None # define render_mode if your environment supports rendering reward_range = (-float("inf"), float("inf")) spec = None @@ -130,42 +181,34 @@ def reset( if seed is not None: self._np_random, seed = seeding.np_random(seed) - def render(self, mode="human"): - """Renders the environment. + # TODO: remove kwarg mode with gym 1.0 + def render(self, mode="human") -> Optional[Union[RenderFrame, List[RenderFrame]]]: + """Compute the render frames as specified by render_mode attribute during initialization of the environment. - A set of supported modes varies per environment. (And some + The set of supported modes varies per environment. (And some third-party environments may not support rendering at all.) - By convention, if mode is: + By convention, if render_mode is: + + - None (default): no render is computed. + - human: render return None. + The environment is continuously rendered in the current display or terminal. Usually for human consumption. + - single_rgb_array: return a single frame representing the current state of the environment. + A frame is a numpy.ndarray with shape (x, y, 3) representing RGB values for an x-by-y pixel image. + - rgb_array: return a list of frames representing the states of the environment since the last reset. + Each frame is a numpy.ndarray with shape (x, y, 3), as with single_rgb_array. + - ansi: Return a list of strings (str) or StringIO.StringIO containing a + terminal-style text representation for each time step. + The text can include newlines and ANSI escape sequences (e.g. for colors). - - human: render to the current display or terminal and - return nothing. Usually for human consumption. - - rgb_array: Return a numpy.ndarray with shape (x, y, 3), - representing RGB values for an x-by-y pixel image, suitable - for turning into a video. - - ansi: Return a string (str) or StringIO.StringIO containing a - terminal-style text representation. The text can include newlines - and ANSI escape sequences (e.g. for colors). + Note: + Rendering computations is performed internally even if you don't call render(). + To avoid this, you can set render_mode = None and, if the environment supports it, + call render() specifying the argument 'mode'. Note: Make sure that your class's metadata 'render_modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method. - - Example: - >>> import numpy as np - >>> class MyEnv(Env): - ... metadata = {'render_modes': ['human', 'rgb_array']} - ... - ... def render(self, mode='human'): - ... if mode == 'rgb_array': - ... return np.array(...) # return RGB frame suitable for video - ... elif mode == 'human': - ... ... # pop up a window and render - ... else: - ... super().render(mode=mode) # just raise an exception - - Args: - mode: the mode to render with, valid modes are `env.metadata["render_modes"]` """ raise NotImplementedError diff --git a/gym/envs/box2d/bipedal_walker.py b/gym/envs/box2d/bipedal_walker.py index 7e35a53fd8e..48c3a16fba3 100644 --- a/gym/envs/box2d/bipedal_walker.py +++ b/gym/envs/box2d/bipedal_walker.py @@ -9,6 +9,7 @@ from gym import error, spaces from gym.error import DependencyNotInstalled from gym.utils import EzPickle +from gym.utils.renderer import Renderer try: import Box2D @@ -159,12 +160,13 @@ class BipedalWalker(gym.Env, EzPickle): """ - metadata = {"render_modes": ["human", "rgb_array"], "render_fps": FPS} + metadata = { + "render_modes": ["human", "rgb_array", "single_rgb_array"], + "render_fps": FPS, + } - def __init__(self, hardcore: bool = False): + def __init__(self, render_mode: Optional[str] = None, hardcore: bool = False): EzPickle.__init__(self) - self.screen = None - self.clock = None self.isopen = True self.world = Box2D.b2World() @@ -252,6 +254,12 @@ def __init__(self, hardcore: bool = False): # ] # state += [l.fraction for l in self.lidar] + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + self.screen = None + self.clock = None + def _destroy(self): if not self.terrain: return @@ -500,6 +508,7 @@ def ReportFixture(self, fixture, point, normal, fraction): return fraction self.lidar = [LidarCallback() for _ in range(10)] + self.renderer.reset() if not return_info: return self.step(np.array([0, 0, 0, 0]))[0] else: @@ -589,9 +598,18 @@ def step(self, action: np.ndarray): done = True if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP: done = True + + self.renderer.render_step() return np.array(state, dtype=np.float32), reward, done, {} def render(self, mode: str = "human"): + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render(mode) + + def _render(self, mode: str = "human"): + assert mode in self.metadata["render_modes"] try: import pygame from pygame import gfxdraw @@ -600,7 +618,7 @@ def render(self, mode: str = "human"): "pygame is not installed, run `pip install gym[box2d]`" ) - if self.screen is None: + if self.screen is None and mode == "human": pygame.init() pygame.display.init() self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H)) @@ -653,18 +671,19 @@ def render(self, mode: str = "human"): self.lidar_render = (self.lidar_render + 1) % 100 i = self.lidar_render if i < 2 * len(self.lidar): - l = ( + single_lidar = ( self.lidar[i] if i < len(self.lidar) else self.lidar[len(self.lidar) - i - 1] ) - pygame.draw.line( - self.surf, - color=(255, 0, 0), - start_pos=(l.p1[0] * SCALE, l.p1[1] * SCALE), - end_pos=(l.p2[0] * SCALE, l.p2[1] * SCALE), - width=1, - ) + if hasattr(single_lidar, "p1") and hasattr(single_lidar, "p2"): + pygame.draw.line( + self.surf, + color=(255, 0, 0), + start_pos=(single_lidar.p1[0] * SCALE, single_lidar.p1[1] * SCALE), + end_pos=(single_lidar.p2[0] * SCALE, single_lidar.p2[1] * SCALE), + width=1, + ) for obj in self.drawlist: for f in obj.fixtures: @@ -717,18 +736,16 @@ def render(self, mode: str = "human"): ) self.surf = pygame.transform.flip(self.surf, False, True) - self.screen.blit(self.surf, (-self.scroll * SCALE, 0)) + if mode == "human": + self.screen.blit(self.surf, (-self.scroll * SCALE, 0)) pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() - - if mode == "rgb_array": + elif mode in {"rgb_array", "single_rgb_array"}: return np.transpose( - np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) + np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2) ) - else: - return self.isopen def close(self): if self.screen is not None: @@ -829,6 +846,5 @@ def __init__(self): a[3] = knee_todo[1] a = np.clip(0.5 * a, -1.0, 1.0) - env.render() if done: break diff --git a/gym/envs/box2d/car_racing.py b/gym/envs/box2d/car_racing.py index 08391baf97b..9e1a3851042 100644 --- a/gym/envs/box2d/car_racing.py +++ b/gym/envs/box2d/car_racing.py @@ -10,6 +10,7 @@ from gym.envs.box2d.car_dynamics import Car from gym.error import DependencyNotInstalled, InvalidAction from gym.utils import EzPickle +from gym.utils.renderer import Renderer try: import Box2D @@ -151,12 +152,19 @@ class CarRacing(gym.Env, EzPickle): """ metadata = { - "render_modes": ["human", "rgb_array", "state_pixels"], + "render_modes": [ + "human", + "rgb_array", + "state_pixels", + "single_rgb_array", + "single_state_pixels", + ], "render_fps": FPS, } def __init__( self, + render_mode: Optional[str] = None, verbose: bool = False, lap_complete_percent: float = 0.95, domain_randomize: bool = False, @@ -170,6 +178,7 @@ def __init__( self.contactListener_keepref = FrictionDetector(self, lap_complete_percent) self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref) self.screen = None + self.surf = None self.clock = None self.isopen = True self.invisible_state_window = None @@ -199,6 +208,10 @@ def __init__( low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8 ) + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + def _destroy(self): if not self.road: return @@ -441,6 +454,7 @@ def reset( ) self.car = Car(self.world, *self.track[0][1:4]) + self.renderer.reset() if not return_info: return self.step(None)[0] else: @@ -466,7 +480,7 @@ def step(self, action: Union[np.ndarray, int]): self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS - self.state = self.render("state_pixels") + self.state = self._render("single_state_pixels") step_reward = 0 done = False @@ -484,9 +498,17 @@ def step(self, action: Union[np.ndarray, int]): done = True step_reward = -100 + self.renderer.render_step() return self.state, step_reward, done, {} def render(self, mode: str = "human"): + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render(mode) + + def _render(self, mode: str = "human"): + assert mode in self.metadata["render_modes"] try: import pygame except ImportError: @@ -496,7 +518,6 @@ def render(self, mode: str = "human"): pygame.font.init() - assert mode in ["human", "state_pixels", "rgb_array"] if self.screen is None and mode == "human": pygame.init() pygame.display.init() @@ -519,7 +540,13 @@ def render(self, mode: str = "human"): trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1]) self._render_road(zoom, trans, angle) - self.car.draw(self.surf, zoom, trans, angle, mode != "state_pixels") + self.car.draw( + self.surf, + zoom, + trans, + angle, + mode not in ["state_pixels", "single_state_pixels"], + ) self.surf = pygame.transform.flip(self.surf, False, True) @@ -539,9 +566,9 @@ def render(self, mode: str = "human"): self.screen.blit(self.surf, (0, 0)) pygame.display.flip() - if mode == "rgb_array": + if mode in {"rgb_array", "single_rgb_array"}: return self._create_image_array(self.surf, (VIDEO_W, VIDEO_H)) - elif mode == "state_pixels": + elif mode in {"state_pixels", "single_state_pixels"}: return self._create_image_array(self.surf, (STATE_W, STATE_H)) else: return self.isopen diff --git a/gym/envs/box2d/lunar_lander.py b/gym/envs/box2d/lunar_lander.py index 7069b2d1f9e..68710ddbedd 100644 --- a/gym/envs/box2d/lunar_lander.py +++ b/gym/envs/box2d/lunar_lander.py @@ -10,6 +10,7 @@ from gym import error, spaces from gym.error import DependencyNotInstalled from gym.utils import EzPickle, colorize +from gym.utils.renderer import Renderer try: import Box2D @@ -171,10 +172,14 @@ class LunarLander(gym.Env, EzPickle): Created by Oleg Klimov """ - metadata = {"render_modes": ["human", "rgb_array"], "render_fps": FPS} + metadata = { + "render_modes": ["human", "rgb_array", "single_rgb_array"], + "render_fps": FPS, + } def __init__( self, + render_mode: Optional[str] = None, continuous: bool = False, gravity: float = -10.0, enable_wind: bool = False, @@ -267,6 +272,10 @@ def __init__( # Nop, fire left engine, main engine, right engine self.action_space = spaces.Discrete(4) + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + def _destroy(self): if not self.moon: return @@ -390,6 +399,7 @@ def reset( self.drawlist = [self.lander] + self.legs + self.renderer.reset() if not return_info: return self.step(np.array([0, 0]) if self.continuous else 0)[0] else: @@ -567,9 +577,18 @@ def step(self, action): if not self.lander.awake: done = True reward = +100 + + self.renderer.render_step() return np.array(state, dtype=np.float32), reward, done, {} def render(self, mode="human"): + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render(mode) + + def _render(self, mode="human"): + assert mode in self.metadata["render_modes"] try: import pygame from pygame import gfxdraw @@ -578,14 +597,14 @@ def render(self, mode="human"): "pygame is not installed, run `pip install gym[box2d]`" ) - if self.screen is None: + if self.screen is None and mode == "human": pygame.init() pygame.display.init() self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H)) if self.clock is None: self.clock = pygame.time.Clock() - self.surf = pygame.Surface(self.screen.get_size()) + self.surf = pygame.Surface((VIEWPORT_W, VIEWPORT_H)) pygame.transform.scale(self.surf, (SCALE, SCALE)) pygame.draw.rect(self.surf, (255, 255, 255), self.surf.get_rect()) @@ -664,19 +683,16 @@ def render(self, mode="human"): ) self.surf = pygame.transform.flip(self.surf, False, True) - self.screen.blit(self.surf, (0, 0)) if mode == "human": + self.screen.blit(self.surf, (0, 0)) pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() - - if mode == "rgb_array": + elif mode in {"rgb_array", "single_rgb_array"}: return np.transpose( np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2) ) - else: - return self.isopen def close(self): if self.screen is not None: diff --git a/gym/envs/classic_control/acrobot.py b/gym/envs/classic_control/acrobot.py index fd26471160d..1cd4b5d83b2 100644 --- a/gym/envs/classic_control/acrobot.py +++ b/gym/envs/classic_control/acrobot.py @@ -20,6 +20,7 @@ # SOURCE: # https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py +from gym.utils.renderer import Renderer class AcrobotEnv(core.Env): @@ -134,7 +135,10 @@ class AcrobotEnv(core.Env): - Sutton, R. S., Barto, A. G. (2018 ). Reinforcement Learning: An Introduction. The MIT Press. """ - metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 15} + metadata = { + "render_modes": ["human", "rgb_array", "single_rgb_array"], + "render_fps": 15, + } dt = 0.2 @@ -161,7 +165,10 @@ class AcrobotEnv(core.Env): domain_fig = None actions_num = 3 - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) self.screen = None self.clock = None self.isopen = True @@ -184,6 +191,9 @@ def reset( self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,)).astype( np.float32 ) + + self.renderer.reset() + self.renderer.render_step() if not return_info: return self._get_ob() else: @@ -213,7 +223,9 @@ def step(self, a): self.state = ns terminal = self._terminal() reward = -1.0 if not terminal else 0.0 - return (self._get_ob(), reward, terminal, {}) + + self.renderer.render_step() + return self._get_ob(), reward, terminal, {} def _get_ob(self): s = self.state @@ -267,9 +279,16 @@ def _dsdt(self, s_augmented): a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1**2 * sin(theta2) - phi2 ) / (m2 * lc2**2 + I2 - d2**2 / d1) ddtheta1 = -(d2 * ddtheta2 + phi1) / d1 - return (dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0) + return dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0 def render(self, mode="human"): + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render(mode) + + def _render(self, mode="human"): + assert mode in self.metadata["render_modes"] try: import pygame from pygame import gfxdraw @@ -280,13 +299,18 @@ def render(self, mode="human"): if self.screen is None: pygame.init() - pygame.display.init() - self.screen = pygame.display.set_mode((self.SCREEN_DIM, self.SCREEN_DIM)) + if mode == "human": + pygame.display.init() + self.screen = pygame.display.set_mode( + (self.SCREEN_DIM, self.SCREEN_DIM) + ) + else: # mode in {"rgb_array", "single_rgb_array"} + self.screen = pygame.Surface((self.SCREEN_DIM, self.SCREEN_DIM)) if self.clock is None: self.clock = pygame.time.Clock() - self.surf = pygame.Surface((self.SCREEN_DIM, self.SCREEN_DIM)) - self.surf.fill((255, 255, 255)) + surf = pygame.Surface((self.SCREEN_DIM, self.SCREEN_DIM)) + surf.fill((255, 255, 255)) s = self.state bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # 2.2 for default @@ -311,7 +335,7 @@ def render(self, mode="human"): link_lengths = [self.LINK_LENGTH_1 * scale, self.LINK_LENGTH_2 * scale] pygame.draw.line( - self.surf, + surf, start_pos=(-2.2 * scale + offset, 1 * scale + offset), end_pos=(2.2 * scale + offset, 1 * scale + offset), color=(0, 0, 0), @@ -327,35 +351,33 @@ def render(self, mode="human"): coord = pygame.math.Vector2(coord).rotate_rad(th) coord = (coord[0] + x, coord[1] + y) transformed_coords.append(coord) - gfxdraw.aapolygon(self.surf, transformed_coords, (0, 204, 204)) - gfxdraw.filled_polygon(self.surf, transformed_coords, (0, 204, 204)) + gfxdraw.aapolygon(surf, transformed_coords, (0, 204, 204)) + gfxdraw.filled_polygon(surf, transformed_coords, (0, 204, 204)) - gfxdraw.aacircle(self.surf, int(x), int(y), int(0.1 * scale), (204, 204, 0)) - gfxdraw.filled_circle( - self.surf, int(x), int(y), int(0.1 * scale), (204, 204, 0) - ) + gfxdraw.aacircle(surf, int(x), int(y), int(0.1 * scale), (204, 204, 0)) + gfxdraw.filled_circle(surf, int(x), int(y), int(0.1 * scale), (204, 204, 0)) + + surf = pygame.transform.flip(surf, False, True) + self.screen.blit(surf, (0, 0)) - self.surf = pygame.transform.flip(self.surf, False, True) - self.screen.blit(self.surf, (0, 0)) if mode == "human": pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() - if mode == "rgb_array": + elif mode in {"rgb_array", "single_rgb_array"}: return np.transpose( np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) ) - else: - return self.isopen - def close(self): - if self.screen is not None: - import pygame - pygame.display.quit() - pygame.quit() - self.isopen = False +def close(self): + if self.screen is not None: + import pygame + + pygame.display.quit() + pygame.quit() + self.isopen = False def wrap(x, m, M): diff --git a/gym/envs/classic_control/cartpole.py b/gym/envs/classic_control/cartpole.py index 426df835ddd..2bbdd014225 100644 --- a/gym/envs/classic_control/cartpole.py +++ b/gym/envs/classic_control/cartpole.py @@ -11,6 +11,7 @@ import gym from gym import logger, spaces from gym.error import DependencyNotInstalled +from gym.utils.renderer import Renderer class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): @@ -79,9 +80,12 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): No additional arguments are currently supported. """ - metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50} + metadata = { + "render_modes": ["human", "rgb_array", "single_rgb_array"], + "render_fps": 50, + } - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 @@ -111,6 +115,12 @@ def __init__(self): self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box(-high, high, dtype=np.float32) + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + + self.screen_width = 600 + self.screen_height = 400 self.screen = None self.clock = None self.isopen = True @@ -174,6 +184,7 @@ def step(self, action): self.steps_beyond_done += 1 reward = 0.0 + self.renderer.render_step() return np.array(self.state, dtype=np.float32), reward, done, {} def reset( @@ -186,12 +197,21 @@ def reset( super().reset(seed=seed) self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) self.steps_beyond_done = None + self.renderer.reset() + self.renderer.render_step() if not return_info: return np.array(self.state, dtype=np.float32) else: return np.array(self.state, dtype=np.float32), {} def render(self, mode="human"): + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render(mode) + + def _render(self, mode="human"): + assert mode in self.metadata["render_modes"] try: import pygame from pygame import gfxdraw @@ -200,11 +220,20 @@ def render(self, mode="human"): "pygame is not installed, run `pip install gym[classic_control]`" ) - screen_width = 600 - screen_height = 400 + if self.screen is None: + pygame.init() + if mode == "human": + pygame.display.init() + self.screen = pygame.display.set_mode( + (self.screen_width, self.screen_height) + ) + else: # mode in {"rgb_array", "single_rgb_array"} + self.screen = pygame.Surface((self.screen_width, self.screen_height)) + if self.clock is None: + self.clock = pygame.time.Clock() world_width = self.x_threshold * 2 - scale = screen_width / world_width + scale = self.screen_width / world_width polewidth = 10.0 polelen = scale * (2 * self.length) cartwidth = 50.0 @@ -215,19 +244,12 @@ def render(self, mode="human"): x = self.state - if self.screen is None: - pygame.init() - pygame.display.init() - self.screen = pygame.display.set_mode((screen_width, screen_height)) - if self.clock is None: - self.clock = pygame.time.Clock() - - self.surf = pygame.Surface((screen_width, screen_height)) + self.surf = pygame.Surface((self.screen_width, self.screen_height)) self.surf.fill((255, 255, 255)) l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2 axleoffset = cartheight / 4.0 - cartx = x[0] * scale + screen_width / 2.0 # MIDDLE OF CART + cartx = x[0] * scale + self.screen_width / 2.0 # MIDDLE OF CART carty = 100 # TOP OF CART cart_coords = [(l, b), (l, t), (r, t), (r, b)] cart_coords = [(c[0] + cartx, c[1] + carty) for c in cart_coords] @@ -264,7 +286,7 @@ def render(self, mode="human"): (129, 132, 203), ) - gfxdraw.hline(self.surf, 0, screen_width, carty, (0, 0, 0)) + gfxdraw.hline(self.surf, 0, self.screen_width, carty, (0, 0, 0)) self.surf = pygame.transform.flip(self.surf, False, True) self.screen.blit(self.surf, (0, 0)) @@ -273,12 +295,10 @@ def render(self, mode="human"): self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() - if mode == "rgb_array": + elif mode in {"rgb_array", "single_rgb_array"}: return np.transpose( np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) ) - else: - return self.isopen def close(self): if self.screen is not None: diff --git a/gym/envs/classic_control/continuous_mountain_car.py b/gym/envs/classic_control/continuous_mountain_car.py index 453ca372df8..f2e24a693a0 100644 --- a/gym/envs/classic_control/continuous_mountain_car.py +++ b/gym/envs/classic_control/continuous_mountain_car.py @@ -21,6 +21,7 @@ import gym from gym import spaces from gym.error import DependencyNotInstalled +from gym.utils.renderer import Renderer class Continuous_MountainCarEnv(gym.Env): @@ -99,9 +100,12 @@ class Continuous_MountainCarEnv(gym.Env): * v0: Initial versions release (1.0.0) """ - metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30} + metadata = { + "render_modes": ["human", "rgb_array", "single_rgb_array"], + "render_fps": 30, + } - def __init__(self, goal_velocity=0): + def __init__(self, render_mode: Optional[str] = None, goal_velocity=0): self.min_action = -1.0 self.max_action = 1.0 self.min_position = -1.2 @@ -120,6 +124,12 @@ def __init__(self, goal_velocity=0): [self.max_position, self.max_speed], dtype=np.float32 ) + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + + self.screen_width = 600 + self.screen_height = 400 self.screen = None self.clock = None self.isopen = True @@ -159,6 +169,8 @@ def step(self, action: np.ndarray): reward -= math.pow(action[0], 2) * 0.1 self.state = np.array([position, velocity], dtype=np.float32) + + self.renderer.render_step() return self.state, reward, done, {} def reset( @@ -170,6 +182,8 @@ def reset( ): super().reset(seed=seed) self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0]) + self.renderer.reset() + self.renderer.render_step() if not return_info: return np.array(self.state, dtype=np.float32) else: @@ -179,6 +193,14 @@ def _height(self, xs): return np.sin(3 * xs) * 0.45 + 0.55 def render(self, mode="human"): + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render(mode) + + def _render(self, mode="human"): + assert mode in self.metadata["render_modes"] + try: import pygame from pygame import gfxdraw @@ -187,21 +209,24 @@ def render(self, mode="human"): "pygame is not installed, run `pip install gym[classic_control]`" ) - screen_width = 600 - screen_height = 400 - - world_width = self.max_position - self.min_position - scale = screen_width / world_width - carwidth = 40 - carheight = 20 if self.screen is None: pygame.init() - pygame.display.init() - self.screen = pygame.display.set_mode((screen_width, screen_height)) + if mode == "human": + pygame.display.init() + self.screen = pygame.display.set_mode( + (self.screen_width, self.screen_height) + ) + else: # mode in {"rgb_array", "single_rgb_array"} + self.screen = pygame.Surface((self.screen_width, self.screen_height)) if self.clock is None: self.clock = pygame.time.Clock() - self.surf = pygame.Surface((screen_width, screen_height)) + world_width = self.max_position - self.min_position + scale = self.screen_width / world_width + carwidth = 40 + carheight = 20 + + self.surf = pygame.Surface((self.screen_width, self.screen_height)) self.surf.fill((255, 255, 255)) pos = self.state[0] @@ -265,12 +290,10 @@ def render(self, mode="human"): self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() - if mode == "rgb_array": + elif mode in {"rgb_array", "single_rgb_array"}: return np.transpose( np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) ) - else: - return self.isopen def close(self): if self.screen is not None: diff --git a/gym/envs/classic_control/mountain_car.py b/gym/envs/classic_control/mountain_car.py index deec9c865fb..61ea23e4278 100644 --- a/gym/envs/classic_control/mountain_car.py +++ b/gym/envs/classic_control/mountain_car.py @@ -10,6 +10,7 @@ import gym from gym import spaces from gym.error import DependencyNotInstalled +from gym.utils.renderer import Renderer class MountainCarEnv(gym.Env): @@ -94,9 +95,12 @@ class MountainCarEnv(gym.Env): * v0: Initial versions release (1.0.0) """ - metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30} + metadata = { + "render_modes": ["human", "rgb_array", "single_rgb_array"], + "render_fps": 30, + } - def __init__(self, goal_velocity=0): + def __init__(self, render_mode: Optional[str] = None, goal_velocity=0): self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 @@ -109,6 +113,12 @@ def __init__(self, goal_velocity=0): self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32) self.high = np.array([self.max_position, self.max_speed], dtype=np.float32) + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + + self.screen_width = 600 + self.screen_height = 400 self.screen = None self.clock = None self.isopen = True @@ -133,6 +143,8 @@ def step(self, action: int): reward = -1.0 self.state = (position, velocity) + + self.renderer.render_step() return np.array(self.state, dtype=np.float32), reward, done, {} def reset( @@ -144,6 +156,8 @@ def reset( ): super().reset(seed=seed) self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0]) + self.renderer.reset() + self.renderer.render_step() if not return_info: return np.array(self.state, dtype=np.float32) else: @@ -153,6 +167,13 @@ def _height(self, xs): return np.sin(3 * xs) * 0.45 + 0.55 def render(self, mode="human"): + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render(mode) + + def _render(self, mode="human"): + assert mode in self.metadata["render_modes"] try: import pygame from pygame import gfxdraw @@ -161,21 +182,24 @@ def render(self, mode="human"): "pygame is not installed, run `pip install gym[classic_control]`" ) - screen_width = 600 - screen_height = 400 - - world_width = self.max_position - self.min_position - scale = screen_width / world_width - carwidth = 40 - carheight = 20 if self.screen is None: pygame.init() - pygame.display.init() - self.screen = pygame.display.set_mode((screen_width, screen_height)) + if mode == "human": + pygame.display.init() + self.screen = pygame.display.set_mode( + (self.screen_width, self.screen_height) + ) + else: # mode in {"rgb_array", "single_rgb_array"} + self.screen = pygame.Surface((self.screen_width, self.screen_height)) if self.clock is None: self.clock = pygame.time.Clock() - self.surf = pygame.Surface((screen_width, screen_height)) + world_width = self.max_position - self.min_position + scale = self.screen_width / world_width + carwidth = 40 + carheight = 20 + + self.surf = pygame.Surface((self.screen_width, self.screen_height)) self.surf.fill((255, 255, 255)) pos = self.state[0] @@ -239,12 +263,10 @@ def render(self, mode="human"): self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() - if mode == "rgb_array": + elif mode in {"rgb_array", "single_rgb_array"}: return np.transpose( np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) ) - else: - return self.isopen def get_keys_to_action(self): # Control with left and right arrow keys. diff --git a/gym/envs/classic_control/pendulum.py b/gym/envs/classic_control/pendulum.py index 20ed31f0540..828ad8bbb87 100644 --- a/gym/envs/classic_control/pendulum.py +++ b/gym/envs/classic_control/pendulum.py @@ -8,6 +8,7 @@ import gym from gym import spaces from gym.error import DependencyNotInstalled +from gym.utils.renderer import Renderer class PendulumEnv(gym.Env): @@ -83,21 +84,28 @@ class PendulumEnv(gym.Env): """ - metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30} + metadata = { + "render_modes": ["human", "rgb_array", "single_rgb_array"], + "render_fps": 30, + } - def __init__(self, g=10.0): + def __init__(self, render_mode: Optional[str] = None, g=10.0): self.max_speed = 8 self.max_torque = 2.0 self.dt = 0.05 self.g = g self.m = 1.0 self.l = 1.0 + + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + + self.screen_dim = 500 self.screen = None self.clock = None self.isopen = True - self.screen_dim = 500 - high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32) # This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric # or normalised as max_torque == 2 by default. Ignoring the issue here as the default settings are too old @@ -124,6 +132,7 @@ def step(self, u): newth = th + newthdot * dt self.state = np.array([newth, newthdot]) + self.renderer.render_step() return self._get_obs(), -costs, False, {} def reset( @@ -137,6 +146,9 @@ def reset( high = np.array([np.pi, 1]) self.state = self.np_random.uniform(low=-high, high=high) self.last_u = None + + self.renderer.reset() + self.renderer.render_step() if not return_info: return self._get_obs() else: @@ -147,6 +159,13 @@ def _get_obs(self): return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32) def render(self, mode="human"): + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render(mode) + + def _render(self, mode="human"): + assert mode in self.metadata["render_modes"] try: import pygame from pygame import gfxdraw @@ -157,8 +176,13 @@ def render(self, mode="human"): if self.screen is None: pygame.init() - pygame.display.init() - self.screen = pygame.display.set_mode((self.screen_dim, self.screen_dim)) + if mode == "human": + pygame.display.init() + self.screen = pygame.display.set_mode( + (self.screen_dim, self.screen_dim) + ) + else: # mode in {"rgb_array", "single_rgb_array"} + self.screen = pygame.Surface((self.screen_dim, self.screen_dim)) if self.clock is None: self.clock = pygame.time.Clock() @@ -200,7 +224,8 @@ def render(self, mode="human"): img = pygame.image.load(fname) if self.last_u is not None: scale_img = pygame.transform.smoothscale( - img, (scale * np.abs(self.last_u) / 2, scale * np.abs(self.last_u) / 2) + img, + (scale * np.abs(self.last_u) / 2, scale * np.abs(self.last_u) / 2), ) is_flip = bool(self.last_u > 0) scale_img = pygame.transform.flip(scale_img, is_flip, True) @@ -223,12 +248,10 @@ def render(self, mode="human"): self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() - if mode == "rgb_array": + else: # mode == "rgb_array": return np.transpose( np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) ) - else: - return self.isopen def close(self): if self.screen is not None: diff --git a/gym/envs/mujoco/ant.py b/gym/envs/mujoco/ant.py index 70728dee5ea..8aa144312da 100644 --- a/gym/envs/mujoco/ant.py +++ b/gym/envs/mujoco/ant.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -5,14 +7,19 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): - mujoco_env.MujocoEnv.__init__(self, "ant.xml", 5, mujoco_bindings="mujoco_py") + def __init__(self, render_mode: Optional[str] = None): + mujoco_env.MujocoEnv.__init__( + self, "ant.xml", 5, render_mode=render_mode, mujoco_bindings="mujoco_py" + ) utils.EzPickle.__init__(self) def step(self, a): xposbefore = self.get_body_com("torso")[0] self.do_simulation(a, self.frame_skip) xposafter = self.get_body_com("torso")[0] + + self.renderer.render_step() + forward_reward = (xposafter - xposbefore) / self.dt ctrl_cost = 0.5 * np.square(a).sum() contact_cost = ( diff --git a/gym/envs/mujoco/ant_v3.py b/gym/envs/mujoco/ant_v3.py index 4260fd7d2f0..63307ff043a 100644 --- a/gym/envs/mujoco/ant_v3.py +++ b/gym/envs/mujoco/ant_v3.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -11,6 +13,7 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="ant.xml", ctrl_cost_weight=0.5, contact_cost_weight=5e-4, @@ -94,6 +97,8 @@ def step(self, action): rewards = forward_reward + healthy_reward costs = ctrl_cost + contact_cost + self.renderer.render_step() + reward = rewards - costs done = self.done observation = self._get_obs() diff --git a/gym/envs/mujoco/ant_v4.py b/gym/envs/mujoco/ant_v4.py index a23ec717a7c..167fc94fd5d 100644 --- a/gym/envs/mujoco/ant_v4.py +++ b/gym/envs/mujoco/ant_v4.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -164,6 +166,7 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="ant.xml", ctrl_cost_weight=0.5, use_contact_forces=False, @@ -194,7 +197,7 @@ def __init__( exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 5) + mujoco_env.MujocoEnv.__init__(self, xml_file, 5, render_mode=render_mode) @property def healthy_reward(self): @@ -268,6 +271,7 @@ def step(self, action): reward = rewards - costs + self.renderer.render_step() return observation, reward, done, info def _get_obs(self): diff --git a/gym/envs/mujoco/half_cheetah.py b/gym/envs/mujoco/half_cheetah.py index 2c32de0b0a9..18087d1db80 100644 --- a/gym/envs/mujoco/half_cheetah.py +++ b/gym/envs/mujoco/half_cheetah.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -5,9 +7,13 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): mujoco_env.MujocoEnv.__init__( - self, "half_cheetah.xml", 5, mujoco_bindings="mujoco_py" + self, + "half_cheetah.xml", + 5, + render_mode=render_mode, + mujoco_bindings="mujoco_py", ) utils.EzPickle.__init__(self) @@ -15,6 +21,9 @@ def step(self, action): xposbefore = self.sim.data.qpos[0] self.do_simulation(action, self.frame_skip) xposafter = self.sim.data.qpos[0] + + self.renderer.render_step() + ob = self._get_obs() reward_ctrl = -0.1 * np.square(action).sum() reward_run = (xposafter - xposbefore) / self.dt diff --git a/gym/envs/mujoco/half_cheetah_v3.py b/gym/envs/mujoco/half_cheetah_v3.py index c6da69f2cc7..afa1abb13c3 100644 --- a/gym/envs/mujoco/half_cheetah_v3.py +++ b/gym/envs/mujoco/half_cheetah_v3.py @@ -1,4 +1,7 @@ __credits__ = ["Rushiv Arora"] + +from typing import Optional + import numpy as np from gym import utils @@ -12,6 +15,7 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="half_cheetah.xml", forward_reward_weight=1.0, ctrl_cost_weight=0.1, @@ -30,7 +34,9 @@ def __init__( exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 5, mujoco_bindings="mujoco_py") + mujoco_env.MujocoEnv.__init__( + self, xml_file, 5, render_mode=render_mode, mujoco_bindings="mujoco_py" + ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) @@ -46,6 +52,8 @@ def step(self, action): forward_reward = self._forward_reward_weight * x_velocity + self.renderer.render_step() + observation = self._get_obs() reward = forward_reward - ctrl_cost done = False diff --git a/gym/envs/mujoco/half_cheetah_v4.py b/gym/envs/mujoco/half_cheetah_v4.py index f27f6b45925..a89aa770bfb 100644 --- a/gym/envs/mujoco/half_cheetah_v4.py +++ b/gym/envs/mujoco/half_cheetah_v4.py @@ -1,4 +1,7 @@ __credits__ = ["Rushiv Arora"] + +from typing import Optional + import numpy as np from gym import utils @@ -148,6 +151,7 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="half_cheetah.xml", forward_reward_weight=1.0, ctrl_cost_weight=0.1, @@ -166,7 +170,7 @@ def __init__( exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 5) + mujoco_env.MujocoEnv.__init__(self, xml_file, 5, render_mode=render_mode) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) @@ -192,6 +196,7 @@ def step(self, action): "reward_ctrl": -ctrl_cost, } + self.renderer.render_step() return observation, reward, done, info def _get_obs(self): diff --git a/gym/envs/mujoco/hopper.py b/gym/envs/mujoco/hopper.py index 34c69be720f..f1c5469964d 100644 --- a/gym/envs/mujoco/hopper.py +++ b/gym/envs/mujoco/hopper.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -5,9 +7,9 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): mujoco_env.MujocoEnv.__init__( - self, "hopper.xml", 4, mujoco_bindings="mujoco_py" + self, "hopper.xml", 4, render_mode=render_mode, mujoco_bindings="mujoco_py" ) utils.EzPickle.__init__(self) @@ -15,6 +17,9 @@ def step(self, a): posbefore = self.sim.data.qpos[0] self.do_simulation(a, self.frame_skip) posafter, height, ang = self.sim.data.qpos[0:3] + + self.renderer.render_step() + alive_bonus = 1.0 reward = (posafter - posbefore) / self.dt reward += alive_bonus diff --git a/gym/envs/mujoco/hopper_v3.py b/gym/envs/mujoco/hopper_v3.py index cd16b07bc91..37c6fca738c 100644 --- a/gym/envs/mujoco/hopper_v3.py +++ b/gym/envs/mujoco/hopper_v3.py @@ -1,5 +1,7 @@ __credits__ = ["Rushiv Arora"] +from typing import Optional + import numpy as np from gym import utils @@ -16,6 +18,7 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="hopper.xml", forward_reward_weight=1.0, ctrl_cost_weight=1e-3, @@ -46,7 +49,9 @@ def __init__( exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 4, mujoco_bindings="mujoco_py") + mujoco_env.MujocoEnv.__init__( + self, xml_file, 4, render_mode=render_mode, mujoco_bindings="mujoco_py" + ) @property def healthy_reward(self): @@ -105,6 +110,8 @@ def step(self, action): rewards = forward_reward + healthy_reward costs = ctrl_cost + self.renderer.render_step() + observation = self._get_obs() reward = rewards - costs done = self.done diff --git a/gym/envs/mujoco/hopper_v4.py b/gym/envs/mujoco/hopper_v4.py index 776cedf63eb..8062ead984a 100644 --- a/gym/envs/mujoco/hopper_v4.py +++ b/gym/envs/mujoco/hopper_v4.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -140,6 +142,7 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="hopper.xml", forward_reward_weight=1.0, ctrl_cost_weight=1e-3, @@ -170,7 +173,7 @@ def __init__( exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 4) + mujoco_env.MujocoEnv.__init__(self, xml_file, 4, render_mode=render_mode) @property def healthy_reward(self): @@ -237,6 +240,7 @@ def step(self, action): "x_velocity": x_velocity, } + self.renderer.render_step() return observation, reward, done, info def reset_model(self): diff --git a/gym/envs/mujoco/humanoid.py b/gym/envs/mujoco/humanoid.py index d69c08eeb99..1e2974b49b3 100644 --- a/gym/envs/mujoco/humanoid.py +++ b/gym/envs/mujoco/humanoid.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -11,9 +13,13 @@ def mass_center(model, sim): class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): mujoco_env.MujocoEnv.__init__( - self, "humanoid.xml", 5, mujoco_bindings="mujoco_py" + self, + "humanoid.xml", + 5, + render_mode=render_mode, + mujoco_bindings="mujoco_py", ) utils.EzPickle.__init__(self) @@ -34,6 +40,9 @@ def step(self, a): pos_before = mass_center(self.model, self.sim) self.do_simulation(a, self.frame_skip) pos_after = mass_center(self.model, self.sim) + + self.renderer.render_step() + alive_bonus = 5.0 data = self.sim.data lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt diff --git a/gym/envs/mujoco/humanoid_v3.py b/gym/envs/mujoco/humanoid_v3.py index b1daaeaf711..6be18ceed55 100644 --- a/gym/envs/mujoco/humanoid_v3.py +++ b/gym/envs/mujoco/humanoid_v3.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -20,6 +22,7 @@ def mass_center(model, sim): class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="humanoid.xml", forward_reward_weight=1.25, ctrl_cost_weight=0.1, @@ -47,7 +50,9 @@ def __init__( exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 5, mujoco_bindings="mujoco_py") + mujoco_env.MujocoEnv.__init__( + self, xml_file, 5, render_mode=render_mode, mujoco_bindings="mujoco_py" + ) @property def healthy_reward(self): @@ -121,6 +126,8 @@ def step(self, action): rewards = forward_reward + healthy_reward costs = ctrl_cost + contact_cost + self.renderer.render_step() + observation = self._get_obs() reward = rewards - costs done = self.done diff --git a/gym/envs/mujoco/humanoid_v4.py b/gym/envs/mujoco/humanoid_v4.py index 18e75f54765..101c94d7e7d 100644 --- a/gym/envs/mujoco/humanoid_v4.py +++ b/gym/envs/mujoco/humanoid_v4.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -204,6 +206,7 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="humanoid.xml", forward_reward_weight=1.25, ctrl_cost_weight=0.1, @@ -227,7 +230,7 @@ def __init__( exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 5) + mujoco_env.MujocoEnv.__init__(self, xml_file, 5, render_mode=render_mode) @property def healthy_reward(self): @@ -306,6 +309,7 @@ def step(self, action): "forward_reward": forward_reward, } + self.renderer.render_step() return observation, reward, done, info def reset_model(self): diff --git a/gym/envs/mujoco/humanoidstandup.py b/gym/envs/mujoco/humanoidstandup.py index 217917b9b09..2af7c4476a4 100644 --- a/gym/envs/mujoco/humanoidstandup.py +++ b/gym/envs/mujoco/humanoidstandup.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -5,9 +7,13 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): mujoco_env.MujocoEnv.__init__( - self, "humanoidstandup.xml", 5, mujoco_bindings="mujoco_py" + self, + "humanoidstandup.xml", + 5, + render_mode=render_mode, + mujoco_bindings="mujoco_py", ) utils.EzPickle.__init__(self) @@ -35,6 +41,8 @@ def step(self, a): quad_impact_cost = min(quad_impact_cost, 10) reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 + self.renderer.render_step() + done = bool(False) return ( self._get_obs(), diff --git a/gym/envs/mujoco/humanoidstandup_v4.py b/gym/envs/mujoco/humanoidstandup_v4.py index fd1611182cf..b17c7734ed2 100644 --- a/gym/envs/mujoco/humanoidstandup_v4.py +++ b/gym/envs/mujoco/humanoidstandup_v4.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -190,8 +192,10 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): """ - def __init__(self): - mujoco_env.MujocoEnv.__init__(self, "humanoidstandup.xml", 5) + def __init__(self, render_mode: Optional[str] = None): + mujoco_env.MujocoEnv.__init__( + self, "humanoidstandup.xml", 5, render_mode=render_mode + ) utils.EzPickle.__init__(self) def _get_obs(self): @@ -218,6 +222,8 @@ def step(self, a): quad_impact_cost = min(quad_impact_cost, 10) reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 + self.renderer.render_step() + done = bool(False) return ( self._get_obs(), diff --git a/gym/envs/mujoco/inverted_double_pendulum.py b/gym/envs/mujoco/inverted_double_pendulum.py index 2f241c02b43..7e96170336c 100644 --- a/gym/envs/mujoco/inverted_double_pendulum.py +++ b/gym/envs/mujoco/inverted_double_pendulum.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -5,14 +7,21 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): mujoco_env.MujocoEnv.__init__( - self, "inverted_double_pendulum.xml", 5, mujoco_bindings="mujoco_py" + self, + "inverted_double_pendulum.xml", + 5, + render_mode=render_mode, + mujoco_bindings="mujoco_py", ) utils.EzPickle.__init__(self) def step(self, action): self.do_simulation(action, self.frame_skip) + + self.renderer.render_step() + ob = self._get_obs() x, _, y = self.sim.data.site_xpos[0] dist_penalty = 0.01 * x**2 + (y - 2) ** 2 diff --git a/gym/envs/mujoco/inverted_double_pendulum_v4.py b/gym/envs/mujoco/inverted_double_pendulum_v4.py index ca5fafbbe96..73bc9aed9f9 100644 --- a/gym/envs/mujoco/inverted_double_pendulum_v4.py +++ b/gym/envs/mujoco/inverted_double_pendulum_v4.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -118,8 +120,10 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): """ - def __init__(self): - mujoco_env.MujocoEnv.__init__(self, "inverted_double_pendulum.xml", 5) + def __init__(self, render_mode: Optional[str] = None): + mujoco_env.MujocoEnv.__init__( + self, "inverted_double_pendulum.xml", 5, render_mode=render_mode + ) utils.EzPickle.__init__(self) def step(self, action): @@ -132,6 +136,9 @@ def step(self, action): alive_bonus = 10 r = alive_bonus - dist_penalty - vel_penalty done = bool(y <= 1) + + self.renderer.render_step() + return ob, r, done, {} def _get_obs(self): diff --git a/gym/envs/mujoco/inverted_pendulum.py b/gym/envs/mujoco/inverted_pendulum.py index 342404b5e91..8f983e2f93d 100644 --- a/gym/envs/mujoco/inverted_pendulum.py +++ b/gym/envs/mujoco/inverted_pendulum.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -5,15 +7,22 @@ class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): utils.EzPickle.__init__(self) mujoco_env.MujocoEnv.__init__( - self, "inverted_pendulum.xml", 2, mujoco_bindings="mujoco_py" + self, + "inverted_pendulum.xml", + 2, + render_mode=render_mode, + mujoco_bindings="mujoco_py", ) def step(self, a): reward = 1.0 self.do_simulation(a, self.frame_skip) + + self.renderer.render_step() + ob = self._get_obs() notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= 0.2) done = not notdone diff --git a/gym/envs/mujoco/inverted_pendulum_v4.py b/gym/envs/mujoco/inverted_pendulum_v4.py index 0044b99f41e..71bc846d75e 100644 --- a/gym/envs/mujoco/inverted_pendulum_v4.py +++ b/gym/envs/mujoco/inverted_pendulum_v4.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -90,9 +92,11 @@ class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): """ - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): utils.EzPickle.__init__(self) - mujoco_env.MujocoEnv.__init__(self, "inverted_pendulum.xml", 2) + mujoco_env.MujocoEnv.__init__( + self, "inverted_pendulum.xml", 2, render_mode=render_mode + ) def step(self, a): reward = 1.0 @@ -100,6 +104,9 @@ def step(self, a): ob = self._get_obs() notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= 0.2) done = not notdone + + self.renderer.render_step() + return ob, reward, done, {} def reset_model(self): diff --git a/gym/envs/mujoco/mujoco_env.py b/gym/envs/mujoco/mujoco_env.py index 00aa89207fc..713b29fa797 100644 --- a/gym/envs/mujoco/mujoco_env.py +++ b/gym/envs/mujoco/mujoco_env.py @@ -6,6 +6,7 @@ import gym from gym import error, logger, spaces +from gym.utils.renderer import Renderer DEFAULT_SIZE = 480 @@ -33,8 +34,13 @@ def convert_observation_to_space(observation): class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments.""" - def __init__(self, model_path, frame_skip, mujoco_bindings="mujoco"): - + def __init__( + self, + model_path, + frame_skip, + render_mode: Optional[str] = None, + mujoco_bindings="mujoco", + ): if model_path.startswith("/"): fullpath = model_path else: @@ -87,12 +93,22 @@ def __init__(self, model_path, frame_skip, mujoco_bindings="mujoco"): self.viewer = None self.metadata = { - "render_modes": ["human", "rgb_array", "depth_array"], + "render_modes": [ + "human", + "rgb_array", + "depth_array", + "single_rgb_array", + "single_depth_array", + ], "render_fps": int(np.round(1.0 / self.dt)), } self._set_action_space() + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + action = self.action_space.sample() observation, _reward, done, _info = self.step(action) assert not done @@ -142,6 +158,8 @@ def reset( self._mujoco_bindings.mj_resetData(self.model, self.data) ob = self.reset_model() + self.renderer.reset() + self.renderer.render_step() if not return_info: return ob else: @@ -195,7 +213,33 @@ def render( camera_id=None, camera_name=None, ): - if mode == "rgb_array" or mode == "depth_array": + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render( + mode=mode, + width=width, + height=height, + camera_id=camera_id, + camera_name=camera_name, + ) + + def _render( + self, + mode="human", + width=DEFAULT_SIZE, + height=DEFAULT_SIZE, + camera_id=None, + camera_name=None, + ): + assert mode in self.metadata["render_modes"] + + if mode in { + "rgb_array", + "single_rgb_array", + "depth_array", + "single_depth_array", + }: if camera_id is not None and camera_name is not None: raise ValueError( "Both `camera_id` and `camera_name` cannot be" @@ -219,11 +263,11 @@ def render( self._get_viewer(mode).render(width, height, camera_id=camera_id) - if mode == "rgb_array": + if mode in {"rgb_array", "single_rgb_array"}: data = self._get_viewer(mode).read_pixels(width, height, depth=False) # original image is upside-down, so flip it return data[::-1, :, :] - elif mode == "depth_array": + elif mode in {"depth_array", "single_depth_array"}: self._get_viewer(mode).render(width, height) # Extract depth part of the read_pixels() tuple data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1] @@ -249,7 +293,12 @@ def _get_viewer(self, mode, width=DEFAULT_SIZE, height=DEFAULT_SIZE): from gym.envs.mujoco.mujoco_rendering import Viewer self.viewer = Viewer(self.model, self.data) - elif mode == "rgb_array" or mode == "depth_array": + elif mode in { + "rgb_array", + "depth_array", + "single_rgb_array", + "single_depth_array", + }: if self._mujoco_bindings.__name__ == "mujoco_py": self.viewer = self._mujoco_bindings.MjRenderContextOffscreen( self.sim, -1 diff --git a/gym/envs/mujoco/pusher.py b/gym/envs/mujoco/pusher.py index ee6da42be76..1d7ee764a43 100644 --- a/gym/envs/mujoco/pusher.py +++ b/gym/envs/mujoco/pusher.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -5,10 +7,10 @@ class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): utils.EzPickle.__init__(self) mujoco_env.MujocoEnv.__init__( - self, "pusher.xml", 5, mujoco_bindings="mujoco_py" + self, "pusher.xml", 5, render_mode=render_mode, mujoco_bindings="mujoco_py" ) def step(self, a): @@ -21,6 +23,9 @@ def step(self, a): reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near self.do_simulation(a, self.frame_skip) + + self.renderer.render_step() + ob = self._get_obs() done = False return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) diff --git a/gym/envs/mujoco/pusher_v4.py b/gym/envs/mujoco/pusher_v4.py index 709ab08c0d2..c8fede6844c 100644 --- a/gym/envs/mujoco/pusher_v4.py +++ b/gym/envs/mujoco/pusher_v4.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -132,9 +134,9 @@ class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): """ - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): utils.EzPickle.__init__(self) - mujoco_env.MujocoEnv.__init__(self, "pusher.xml", 5) + mujoco_env.MujocoEnv.__init__(self, "pusher.xml", 5, render_mode=render_mode) def step(self, a): vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") @@ -148,6 +150,9 @@ def step(self, a): self.do_simulation(a, self.frame_skip) ob = self._get_obs() done = False + + self.renderer.render_step() + return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) def viewer_setup(self): diff --git a/gym/envs/mujoco/reacher.py b/gym/envs/mujoco/reacher.py index 5fd1834bcc5..79401a8ea7e 100644 --- a/gym/envs/mujoco/reacher.py +++ b/gym/envs/mujoco/reacher.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -5,10 +7,10 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): utils.EzPickle.__init__(self) mujoco_env.MujocoEnv.__init__( - self, "reacher.xml", 2, mujoco_bindings="mujoco_py" + self, "reacher.xml", 2, render_mode=render_mode, mujoco_bindings="mujoco_py" ) def step(self, a): @@ -17,6 +19,9 @@ def step(self, a): reward_ctrl = -np.square(a).sum() reward = reward_dist + reward_ctrl self.do_simulation(a, self.frame_skip) + + self.renderer.render_step() + ob = self._get_obs() done = False return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) diff --git a/gym/envs/mujoco/reacher_v4.py b/gym/envs/mujoco/reacher_v4.py index bea7180fe00..e055f4f89ea 100644 --- a/gym/envs/mujoco/reacher_v4.py +++ b/gym/envs/mujoco/reacher_v4.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -122,9 +124,9 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): """ - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): utils.EzPickle.__init__(self) - mujoco_env.MujocoEnv.__init__(self, "reacher.xml", 2) + mujoco_env.MujocoEnv.__init__(self, "reacher.xml", 2, render_mode=render_mode) def step(self, a): vec = self.get_body_com("fingertip") - self.get_body_com("target") @@ -134,6 +136,9 @@ def step(self, a): self.do_simulation(a, self.frame_skip) ob = self._get_obs() done = False + + self.renderer.render_step() + return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) def viewer_setup(self): diff --git a/gym/envs/mujoco/swimmer.py b/gym/envs/mujoco/swimmer.py index bfa6a8895fc..71bf9b98501 100644 --- a/gym/envs/mujoco/swimmer.py +++ b/gym/envs/mujoco/swimmer.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -5,9 +7,9 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): mujoco_env.MujocoEnv.__init__( - self, "swimmer.xml", 4, mujoco_bindings="mujoco_py" + self, "swimmer.xml", 4, render_mode=render_mode, mujoco_bindings="mujoco_py" ) utils.EzPickle.__init__(self) @@ -16,6 +18,9 @@ def step(self, a): xposbefore = self.sim.data.qpos[0] self.do_simulation(a, self.frame_skip) xposafter = self.sim.data.qpos[0] + + self.renderer.render_step() + reward_fwd = (xposafter - xposbefore) / self.dt reward_ctrl = -ctrl_cost_coeff * np.square(a).sum() reward = reward_fwd + reward_ctrl diff --git a/gym/envs/mujoco/swimmer_v3.py b/gym/envs/mujoco/swimmer_v3.py index 18f848c3678..ff9dc579107 100644 --- a/gym/envs/mujoco/swimmer_v3.py +++ b/gym/envs/mujoco/swimmer_v3.py @@ -1,5 +1,7 @@ __credits__ = ["Rushiv Arora"] +from typing import Optional + import numpy as np from gym import utils @@ -11,6 +13,7 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="swimmer.xml", forward_reward_weight=1.0, ctrl_cost_weight=1e-4, @@ -28,7 +31,9 @@ def __init__( exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 4, mujoco_bindings="mujoco_py") + mujoco_env.MujocoEnv.__init__( + self, xml_file, 4, render_mode=render_mode, mujoco_bindings="mujoco_py" + ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) @@ -39,11 +44,12 @@ def step(self, action): self.do_simulation(action, self.frame_skip) xy_position_after = self.sim.data.qpos[0:2].copy() + self.renderer.render_step() + xy_velocity = (xy_position_after - xy_position_before) / self.dt x_velocity, y_velocity = xy_velocity forward_reward = self._forward_reward_weight * x_velocity - ctrl_cost = self.control_cost(action) observation = self._get_obs() diff --git a/gym/envs/mujoco/swimmer_v4.py b/gym/envs/mujoco/swimmer_v4.py index aa499d14362..5dfee5aa9eb 100644 --- a/gym/envs/mujoco/swimmer_v4.py +++ b/gym/envs/mujoco/swimmer_v4.py @@ -1,5 +1,7 @@ __credits__ = ["Rushiv Arora"] +from typing import Optional + import numpy as np from gym import utils @@ -132,6 +134,7 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="swimmer.xml", forward_reward_weight=1.0, ctrl_cost_weight=1e-4, @@ -149,7 +152,7 @@ def __init__( exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 4) + mujoco_env.MujocoEnv.__init__(self, xml_file, 4, render_mode=render_mode) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) @@ -181,6 +184,7 @@ def step(self, action): "forward_reward": forward_reward, } + self.renderer.render_step() return observation, reward, done, info def _get_obs(self): diff --git a/gym/envs/mujoco/walker2d.py b/gym/envs/mujoco/walker2d.py index fc35d633bd0..ef4ece1cc31 100644 --- a/gym/envs/mujoco/walker2d.py +++ b/gym/envs/mujoco/walker2d.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -5,9 +7,13 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): mujoco_env.MujocoEnv.__init__( - self, "walker2d.xml", 4, mujoco_bindings="mujoco_py" + self, + "walker2d.xml", + 4, + render_mode=render_mode, + mujoco_bindings="mujoco_py", ) utils.EzPickle.__init__(self) @@ -15,12 +21,16 @@ def step(self, a): posbefore = self.sim.data.qpos[0] self.do_simulation(a, self.frame_skip) posafter, height, ang = self.sim.data.qpos[0:3] + + self.renderer.render_step() + alive_bonus = 1.0 reward = (posafter - posbefore) / self.dt reward += alive_bonus reward -= 1e-3 * np.square(a).sum() done = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0) ob = self._get_obs() + return ob, reward, done, {} def _get_obs(self): diff --git a/gym/envs/mujoco/walker2d_v3.py b/gym/envs/mujoco/walker2d_v3.py index a03ef972453..92952229464 100644 --- a/gym/envs/mujoco/walker2d_v3.py +++ b/gym/envs/mujoco/walker2d_v3.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -14,6 +16,7 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="walker2d.xml", forward_reward_weight=1.0, ctrl_cost_weight=1e-3, @@ -23,6 +26,7 @@ def __init__( healthy_angle_range=(-1.0, 1.0), reset_noise_scale=5e-3, exclude_current_positions_from_observation=True, + **kwargs ): utils.EzPickle.__init__(**locals()) @@ -41,7 +45,9 @@ def __init__( exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 4, mujoco_bindings="mujoco_py") + mujoco_env.MujocoEnv.__init__( + self, xml_file, 4, render_mode=render_mode, mujoco_bindings="mujoco_py" + ) @property def healthy_reward(self): @@ -88,8 +94,9 @@ def step(self, action): x_position_after = self.sim.data.qpos[0] x_velocity = (x_position_after - x_position_before) / self.dt - ctrl_cost = self.control_cost(action) + self.renderer.render_step() + ctrl_cost = self.control_cost(action) forward_reward = self._forward_reward_weight * x_velocity healthy_reward = self.healthy_reward diff --git a/gym/envs/mujoco/walker2d_v4.py b/gym/envs/mujoco/walker2d_v4.py index e3085d8a121..f778e5f998b 100644 --- a/gym/envs/mujoco/walker2d_v4.py +++ b/gym/envs/mujoco/walker2d_v4.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from gym import utils @@ -158,6 +160,7 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__( self, + render_mode: Optional[str] = None, xml_file="walker2d.xml", forward_reward_weight=1.0, ctrl_cost_weight=1e-3, @@ -185,7 +188,7 @@ def __init__( exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 4) + mujoco_env.MujocoEnv.__init__(self, xml_file, 4, render_mode=render_mode) @property def healthy_reward(self): @@ -248,6 +251,7 @@ def step(self, action): "x_velocity": x_velocity, } + self.renderer.render_step() return observation, reward, done, info def reset_model(self): diff --git a/gym/envs/toy_text/blackjack.py b/gym/envs/toy_text/blackjack.py index dc77a933891..60fed098d99 100644 --- a/gym/envs/toy_text/blackjack.py +++ b/gym/envs/toy_text/blackjack.py @@ -6,6 +6,7 @@ import gym from gym import spaces from gym.error import DependencyNotInstalled +from gym.utils.renderer import Renderer def cmp(a, b): @@ -110,9 +111,12 @@ class BlackjackEnv(gym.Env): * v0: Initial versions release (1.0.0) """ - metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4} + metadata = { + "render_modes": ["human", "rgb_array", "single_rgb_array"], + "render_fps": 4, + } - def __init__(self, natural=False, sab=False): + def __init__(self, render_mode: Optional[str] = None, natural=False, sab=False): self.action_space = spaces.Discrete(2) self.observation_space = spaces.Tuple( (spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2)) @@ -125,6 +129,10 @@ def __init__(self, natural=False, sab=False): # Flag for full agreement with the (Sutton and Barto, 2018) definition. Overrides self.natural self.sab = sab + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + def step(self, action): assert self.action_space.contains(action) if action: # hit: add a card to players hand and return @@ -151,6 +159,8 @@ def step(self, action): ): # Natural gives extra points, but doesn't autowin. Legacy implementation reward = 1.5 + + self.renderer.render_step() return self._get_obs(), reward, done, {} def _get_obs(self): @@ -165,12 +175,24 @@ def reset( super().reset(seed=seed) self.dealer = draw_hand(self.np_random) self.player = draw_hand(self.np_random) + + self.renderer.reset() + self.renderer.render_step() + if not return_info: return self._get_obs() else: return self._get_obs(), {} def render(self, mode="human"): + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render(mode) + + def _render(self, mode): + assert mode in self.metadata["render_modes"] + try: import pygame except ImportError: diff --git a/gym/envs/toy_text/cliffwalking.py b/gym/envs/toy_text/cliffwalking.py index 4c663a90c71..fc77f291a07 100644 --- a/gym/envs/toy_text/cliffwalking.py +++ b/gym/envs/toy_text/cliffwalking.py @@ -7,6 +7,7 @@ from gym import Env, spaces from gym.envs.toy_text.utils import categorical_sample +from gym.utils.renderer import Renderer UP = 0 RIGHT = 1 @@ -62,7 +63,7 @@ class CliffWalkingEnv(Env): metadata = {"render_modes": ["human", "ansi"], "render_fps": 4} - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): self.shape = (4, 12) self.start_state_index = np.ravel_multi_index((3, 0), self.shape) @@ -91,6 +92,10 @@ def __init__(self): self.observation_space = spaces.Discrete(self.nS) self.action_space = spaces.Discrete(self.nA) + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + def _limit_coordinates(self, coord: np.ndarray) -> np.ndarray: """Prevent the agent from falling out of the grid world.""" coord[0] = min(coord[0], self.shape[0] - 1) @@ -125,6 +130,7 @@ def step(self, a): p, s, r, d = transitions[i] self.s = s self.lastaction = a + self.renderer.render_step() return (int(s), r, d, {"prob": p}) def reset( @@ -137,12 +143,21 @@ def reset( super().reset(seed=seed) self.s = categorical_sample(self.initial_state_distrib, self.np_random) self.lastaction = None + self.renderer.reset() + self.renderer.render_step() if not return_info: return int(self.s) else: return int(self.s), {"prob": 1} def render(self, mode="human"): + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render(mode) + + def _render(self, mode): + assert mode in self.metadata["render_modes"] outfile = StringIO() if mode == "ansi" else sys.stdout for s in range(self.nS): diff --git a/gym/envs/toy_text/frozen_lake.py b/gym/envs/toy_text/frozen_lake.py index 1c87dfa5e3c..26c64c88659 100644 --- a/gym/envs/toy_text/frozen_lake.py +++ b/gym/envs/toy_text/frozen_lake.py @@ -8,6 +8,7 @@ from gym import Env, spaces, utils from gym.envs.toy_text.utils import categorical_sample from gym.error import DependencyNotInstalled +from gym.utils.renderer import Renderer LEFT = 0 DOWN = 1 @@ -144,9 +145,18 @@ class FrozenLakeEnv(Env): * v0: Initial versions release (1.0.0) """ - metadata = {"render_modes": ["human", "ansi", "rgb_array"], "render_fps": 4} + metadata = { + "render_modes": ["human", "ansi", "rgb_array", "single_rgb_array"], + "render_fps": 4, + } - def __init__(self, desc=None, map_name="4x4", is_slippery=True): + def __init__( + self, + render_mode: Optional[str] = None, + desc=None, + map_name="4x4", + is_slippery=True, + ): if desc is None and map_name is None: desc = generate_random_map() elif desc is None: @@ -205,6 +215,10 @@ def update_probability_matrix(row, col, action): self.observation_space = spaces.Discrete(nS) self.action_space = spaces.Discrete(nA) + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + # pygame utils self.window_size = (min(64 * ncol, 512), min(64 * nrow, 512)) self.window_surface = None @@ -222,6 +236,9 @@ def step(self, a): p, s, r, d = transitions[i] self.s = s self.lastaction = a + + self.renderer.render_step() + return (int(s), r, d, {"prob": p}) def reset( @@ -235,19 +252,28 @@ def reset( self.s = categorical_sample(self.initial_state_distrib, self.np_random) self.lastaction = None + self.renderer.reset() + self.renderer.render_step() + if not return_info: return int(self.s) else: return int(self.s), {"prob": 1} def render(self, mode="human"): - desc = self.desc.tolist() - if mode == "ansi": - return self._render_text(desc) + if self.render_mode is not None: + return self.renderer.get_renders() else: - return self._render_gui(desc, mode) + return self._render(mode) + + def _render(self, mode="human"): + assert mode in self.metadata["render_modes"] + if mode == "ansi": + return self._render_text() + elif mode in {"human", "rgb_array", "single_rgb_array"}: + return self._render_gui(mode) - def _render_gui(self, desc, mode): + def _render_gui(self, mode): try: import pygame except ImportError: @@ -261,7 +287,7 @@ def _render_gui(self, desc, mode): pygame.display.set_caption("Frozen Lake") if mode == "human": self.window_surface = pygame.display.set_mode(self.window_size) - else: # rgb_array + elif mode in {"rgb_array", "single_rgb_array"}: self.window_surface = pygame.Surface(self.window_size) if self.clock is None: self.clock = pygame.time.Clock() @@ -315,6 +341,7 @@ def _render_gui(self, desc, mode): goal_img = pygame.transform.scale(self.goal_img, (cell_width, cell_height)) start_img = pygame.transform.scale(self.start_img, (small_cell_w, small_cell_h)) + desc = self.desc.tolist() for y in range(self.nrow): for x in range(self.ncol): rect = (x * cell_width, y * cell_height, cell_width, cell_height) @@ -351,7 +378,7 @@ def _render_gui(self, desc, mode): pygame.event.pump() pygame.display.update() self.clock.tick(self.metadata["render_fps"]) - else: # rgb_array + elif mode in {"rgb_array", "single_rgb_array"}: return np.transpose( np.array(pygame.surfarray.pixels3d(self.window_surface)), axes=(1, 0, 2) ) @@ -365,7 +392,8 @@ def _center_small_rect(big_rect, small_dims): big_rect[1] + offset_h, ) - def _render_text(self, desc): + def _render_text(self): + desc = self.desc.tolist() outfile = StringIO() row, col = self.s // self.ncol, self.s % self.ncol diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py index 72ba4aff731..acb9dafab9b 100644 --- a/gym/envs/toy_text/taxi.py +++ b/gym/envs/toy_text/taxi.py @@ -8,6 +8,7 @@ from gym import Env, spaces, utils from gym.envs.toy_text.utils import categorical_sample from gym.error import DependencyNotInstalled +from gym.utils.renderer import Renderer MAP = [ "+---------+", @@ -104,9 +105,12 @@ class TaxiEnv(Env): * v0: Initial versions release """ - metadata = {"render_modes": ["human", "ansi", "rgb_array"], "render_fps": 4} + metadata = { + "render_modes": ["human", "ansi", "rgb_array", "single_rgb_array"], + "render_fps": 4, + } - def __init__(self): + def __init__(self, render_mode: Optional[str] = None): self.desc = np.asarray(MAP, dtype="c") self.locs = locs = [(0, 0), (0, 4), (4, 0), (4, 3)] @@ -169,6 +173,10 @@ def __init__(self): self.action_space = spaces.Discrete(num_actions) self.observation_space = spaces.Discrete(num_states) + assert render_mode is None or render_mode in self.metadata["render_modes"] + self.render_mode = render_mode + self.renderer = Renderer(self.render_mode, self._render) + # pygame utils self.window = None self.clock = None @@ -213,6 +221,7 @@ def step(self, a): p, s, r, d = transitions[i] self.s = s self.lastaction = a + self.renderer.render_step() return (int(s), r, d, {"prob": p}) def reset( @@ -226,15 +235,24 @@ def reset( self.s = categorical_sample(self.initial_state_distrib, self.np_random) self.lastaction = None self.taxi_orientation = 0 + self.renderer.reset() + self.renderer.render_step() if not return_info: return int(self.s) else: return int(self.s), {"prob": 1} def render(self, mode="human"): + if self.render_mode is not None: + return self.renderer.get_renders() + else: + return self._render(mode) + + def _render(self, mode): + assert mode in self.metadata["render_modes"] if mode == "ansi": return self._render_text() - else: + elif mode in {"human", "rgb_array", "single_rgb_array"}: return self._render_gui(mode) def _render_gui(self, mode): @@ -250,7 +268,7 @@ def _render_gui(self, mode): pygame.display.set_caption("Taxi") if mode == "human": self.window = pygame.display.set_mode(WINDOW_SIZE) - else: # "rgb_array" + elif mode in {"rgb_array", "single_rgb_array"}: self.window = pygame.Surface(WINDOW_SIZE) if self.clock is None: self.clock = pygame.time.Clock() @@ -358,7 +376,7 @@ def _render_gui(self, mode): if mode == "human": pygame.display.update() self.clock.tick(self.metadata["render_fps"]) - else: # rgb_array + elif mode in {"rgb_array", "single_rgb_array"}: return np.transpose( np.array(pygame.surfarray.pixels3d(self.window)), axes=(1, 0, 2) ) diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index e8d7fbeb2bb..9fe621ffaf9 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -170,37 +170,39 @@ def check_reset_options(env: gym.Env): ) -# Check render cannot be covered by CI -def check_render(env: gym.Env, headless: bool = False): - """Check the declared render modes/fps and the :meth:`render`/:meth:`close` method of the environment. +def check_render(env: gym.Env, warn: bool = True): + """Check the declared render modes/fps of the environment. Args: env: The environment to check - headless: Whether to disable render modes that require a graphical interface. False by default. + warn: Whether to output additional warnings """ render_modes = env.metadata.get("render_modes") if render_modes is None: - logger.warn( - "No render modes was declared in the environment (env.metadata['render_modes'] is None or not defined), you may have trouble when calling `.render()`" - ) + if warn: + logger.warn( + "No render modes was declared in the environment " + " (env.metadata['render_modes'] is None or not defined), " + "you may have trouble when calling `.render()`" + ) render_fps = env.metadata.get("render_fps") # We only require `render_fps` if rendering is actually implemented - if render_fps is None: - logger.warn( - "No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps" - ) - - if render_modes is not None: - # Don't check render mode that require a - # graphical interface (useful for CI) - if headless and "human" in render_modes: - render_modes.remove("human") + if render_fps is None and render_modes is not None and len(render_modes) > 0: + if warn: + logger.warn( + "No render fps was declared in the environment " + " (env.metadata['render_fps'] is None or not defined), " + "rendering may occur at inconsistent fps" + ) - # Check all declared render modes - for mode in render_modes: - env.render(mode=mode) - env.close() + if warn: + if not hasattr(env, "render_mode"): # TODO: raise an error with gym 1.0 + logger.warn("Environments must define render_mode attribute.") + elif env.render_mode is not None and env.render_mode not in render_modes: + logger.warn( + "The environment was initialized successfully with an unsupported render mode." + ) def check_env(env: gym.Env, warn: bool = None, skip_render_check: bool = True): diff --git a/gym/utils/renderer.py b/gym/utils/renderer.py new file mode 100644 index 00000000000..cbf68579ce7 --- /dev/null +++ b/gym/utils/renderer.py @@ -0,0 +1,79 @@ +"""A utility class to collect render frames from a function that computes a single frame.""" +from typing import Any, Callable, List, Optional, Set + +# list of modes with which render function returns None +NO_RETURNS_RENDER = {"human"} + +# list of modes with which render returns just a single frame of the current state +SINGLE_RENDER = {"single_rgb_array", "single_depth_array", "single_state_pixels"} + + +class Renderer: + """This class serves to easily integrate collection of renders for environments that can computes a single render. + + To use this function: + - instantiate this class with the mode and the function that computes a single frame + - call render_step method each time the frame should be saved in the list + (usually at the end of the step and reset methods) + - call get_renders whenever you want to retrieve renders + (usually in the render method) + - call reset to clean the render list + (usually in the reset method of the environment) + """ + + def __init__( + self, + mode: Optional[str], + render: Callable[[str], Any], + no_returns_render: Optional[Set[str]] = None, + single_render: Optional[Set[str]] = None, + ): + """Instantiates a Renderer object. + + Args: + mode (Optional[str]): Way to render + render (Callable[[str], Any]): Function that receives the mode and computes a single frame + no_returns_render (Optional[Set[str]]): Set of render modes that don't return any value. + The default value is the set {"human"}. + single_render (Optional[Set[str]]): Set of render modes that should return a single frame. + The default value is the set {"single_rgb_array", "single_depth_array", "single_state_pixels"}. + """ + if no_returns_render is None: + no_returns_render = NO_RETURNS_RENDER + if single_render is None: + single_render = SINGLE_RENDER + + self.no_returns_render = no_returns_render + self.single_render = single_render + self.mode = mode + self.render = render + self.render_list = [] + + def render_step(self) -> None: + """Computes a frame and save it to the render collection list. + + This method should be usually called inside environment's step and reset method. + """ + if self.mode is not None and self.mode not in SINGLE_RENDER: + render_return = self.render(self.mode) + if self.mode not in NO_RETURNS_RENDER: + self.render_list.append(render_return) + + def get_renders(self) -> Optional[List]: + """Pops all the frames from the render collection list. + + This method should be usually called in the environment's render method to retrieve the frames collected till this time step. + """ + if self.mode in SINGLE_RENDER: + return self.render(self.mode) + elif self.mode is not None and self.mode not in NO_RETURNS_RENDER: + renders = self.render_list + self.render_list = [] + return renders + + def reset(self): + """Resets the render collection list. + + This method should be usually called inside environment's reset method. + """ + self.render_list = [] diff --git a/gym/wrappers/monitoring/video_recorder.py b/gym/wrappers/monitoring/video_recorder.py index 75b52a956a7..a67a334e461 100644 --- a/gym/wrappers/monitoring/video_recorder.py +++ b/gym/wrappers/monitoring/video_recorder.py @@ -7,7 +7,7 @@ import subprocess import tempfile from io import StringIO -from typing import Optional, Tuple, Union +from typing import List, Optional, Tuple, Union import numpy as np @@ -19,11 +19,16 @@ def touch(path: str): open(path, "a").close() -class VideoRecorder: +class VideoRecorder: # TODO: remove with gym 1.0 """VideoRecorder renders a nice movie of a rollout, frame by frame. It comes with an ``enabled`` option, so you can still use the same code on episodes where you don't want to record video. + Note: + VideoRecorder is deprecated. + Collect the frames with render_mode='rgb_array' and use an external library like MoviePy: + https://zulko.github.io/moviepy/getting_started/videoclips.html#videoclip + Note: You are responsible for calling :meth:`close` on a created VideoRecorder, or else you may leak an encoder process. """ @@ -50,6 +55,11 @@ def __init__( Error: Invalid path given that must have a particular file extension """ modes = env.metadata.get("render_modes", []) + logger.deprecation( + "VideoRecorder is deprecated.\n" + "Collect the frames with render_mode='rgb_array' and use an external library like MoviePy: " + "https://zulko.github.io/moviepy/getting_started/videoclips.html#videoclip" + ) # backward-compatibility mode: backward_compatible_mode = env.metadata.get("render.modes", []) @@ -64,10 +74,6 @@ def __init__( self.enabled = enabled self._closed = False - # Don't bother setting anything else if not enabled - if not self.enabled: - return - self.ansi_mode = False if "rgb_array" not in modes: if "ansi" in modes: @@ -78,7 +84,10 @@ def __init__( ) # Whoops, turns out we shouldn't be enabled after all self.enabled = False - return + + # Don't bother setting anything else if not enabled + if not self.enabled: + return if path is not None and base_path is not None: raise error.Error("You can pass at most one of `path` or `base_path`.") @@ -171,6 +180,8 @@ def capture_frame(self): render_mode = "ansi" if self.ansi_mode else "rgb_array" frame = self.env.render(mode=render_mode) + if isinstance(frame, List): + frame = frame[-1] if frame is None: if self._async: diff --git a/gym/wrappers/pixel_observation.py b/gym/wrappers/pixel_observation.py index 8953d08b8ae..0cc6d72ed37 100644 --- a/gym/wrappers/pixel_observation.py +++ b/gym/wrappers/pixel_observation.py @@ -2,7 +2,7 @@ import collections import copy from collections.abc import MutableMapping -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import numpy as np @@ -95,10 +95,6 @@ def __init__( for key in pixel_keys: render_kwargs.setdefault(key, {}) - render_mode = render_kwargs[key].pop("mode", "rgb_array") - assert render_mode == "rgb_array", render_mode - render_kwargs[key]["mode"] = "rgb_array" - wrapped_observation_space = env.observation_space if isinstance(wrapped_observation_space, spaces.Box): @@ -133,6 +129,7 @@ def __init__( pixels_spaces = {} for pixel_key in pixel_keys: pixels = self.env.render(**render_kwargs[pixel_key]) + pixels = pixels[-1] if isinstance(pixels, List) else pixels if np.issubdtype(pixels.dtype, np.integer): low, high = (0, 255) diff --git a/gym/wrappers/record_video.py b/gym/wrappers/record_video.py index 96d0d32651f..bcbde313482 100644 --- a/gym/wrappers/record_video.py +++ b/gym/wrappers/record_video.py @@ -24,7 +24,7 @@ def capped_cubic_video_schedule(episode_id: int) -> bool: return episode_id % 1000 == 0 -class RecordVideo(gym.Wrapper): +class RecordVideo(gym.Wrapper): # TODO: remove with gym 1.0 """This wrapper records videos of rollouts. Usually, you only want to record episodes intermittently, say every hundredth episode. @@ -35,6 +35,11 @@ class RecordVideo(gym.Wrapper): By default, the recording will be stopped once a `done` signal has been emitted by the environment. However, you can also create recordings of fixed length (possibly spanning several episodes) by passing a strictly positive value for ``video_length``. + + Note: + RecordVideo is deprecated. + Collect the frames with render_mode='rgb_array' and use an external library like MoviePy: + https://zulko.github.io/moviepy/getting_started/videoclips.html#videoclip """ def __init__( @@ -58,6 +63,11 @@ def __init__( name_prefix (str): Will be prepended to the filename of the recordings """ super().__init__(env) + logger.deprecation( + "RecordVideo is deprecated.\n" + "Collect the frames with render_mode='rgb_array' and use an external library like MoviePy: " + "https://zulko.github.io/moviepy/getting_started/videoclips.html#videoclip" + ) if episode_trigger is None and step_trigger is None: episode_trigger = capped_cubic_video_schedule @@ -90,7 +100,13 @@ def __init__( def reset(self, **kwargs): """Reset the environment using kwargs and then starts recording if video enabled.""" observations = super().reset(**kwargs) - if not self.recording and self._video_enabled(): + if self.recording: + self.video_recorder.capture_frame() + self.recorded_frames += 1 + if self.video_length > 0: + if self.recorded_frames > self.video_length: + self.close_video_recorder() + elif self._video_enabled(): self.start_video_recorder() return observations diff --git a/tests/envs/test_envs.py b/tests/envs/test_envs.py index 1a174e15197..affaf0d1960 100644 --- a/tests/envs/test_envs.py +++ b/tests/envs/test_envs.py @@ -1,3 +1,5 @@ +from typing import List + import numpy as np import pytest @@ -49,15 +51,6 @@ def test_env(spec): assert ( observation.dtype == ob_space.dtype ), f"Step observation dtype: {ob.dtype}, expected: {ob_space.dtype}" - for mode in env.metadata.get("render_modes", []): - if not (mode == "human" and spec.entry_point.startswith("gym.envs.mujoco")): - env.render(mode=mode) - - # Make sure we can render the environment after close. - for mode in env.metadata.get("render_modes", []): - if not (mode == "human" and spec.entry_point.startswith("gym.envs.mujoco")): - - env.render(mode=mode) env.close() @@ -79,14 +72,30 @@ def test_reset_info(spec): env.close() +@pytest.mark.parametrize( + "spec", spec_list_no_mujoco_py, ids=[spec.id for spec in spec_list_no_mujoco_py] +) +def test_render_modes(spec): + env = spec.make() + + for mode in env.metadata.get("render_modes", []): + if mode != "human": + new_env = spec.make(render_mode=mode) + + new_env.reset() + new_env.step(new_env.action_space.sample()) + new_env.render() + + def test_env_render_result_is_immutable(): environs = [ - envs.make("Taxi-v3"), - envs.make("FrozenLake-v1"), + envs.make("Taxi-v3", render_mode="ansi"), + envs.make("FrozenLake-v1", render_mode="ansi"), ] for env in environs: env.reset() - output = env.render(mode="ansi") - assert isinstance(output, str) + output = env.render() + assert isinstance(output, List) + assert isinstance(output[0], str) env.close() diff --git a/tests/utils/test_env_checker.py b/tests/utils/test_env_checker.py index b50ec4c39e1..133f4a4c1ec 100644 --- a/tests/utils/test_env_checker.py +++ b/tests/utils/test_env_checker.py @@ -12,6 +12,9 @@ class ActionDictTestEnv(gym.Env): action_space = Dict({"position": Discrete(1), "velocity": Discrete(1)}) observation_space = Box(low=-1.0, high=2.0, shape=(3,), dtype=np.float32) + def __init__(self, render_mode: Optional[str] = None): + self.render_mode = render_mode + def step(self, action): observation = np.array([1.0, 1.5, 0.5]) reward = 1 @@ -22,7 +25,7 @@ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) return np.array([1.0, 1.5, 0.5]) - def render(self, mode="human"): + def render(self, mode: Optional[str] = "human"): pass diff --git a/tests/vector/test_async_vector_env.py b/tests/vector/test_async_vector_env.py index 264801df88b..9ba85c57fc6 100644 --- a/tests/vector/test_async_vector_env.py +++ b/tests/vector/test_async_vector_env.py @@ -105,11 +105,11 @@ def test_step_async_vector_env(shared_memory, use_single_action_space): @pytest.mark.parametrize("shared_memory", [True, False]) def test_call_async_vector_env(shared_memory): - env_fns = [make_env("CartPole-v1", i) for i in range(4)] + env_fns = [make_env("CartPole-v1", i, render_mode="rgb_array") for i in range(4)] try: env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) _ = env.reset() - images = env.call("render", mode="rgb_array") + images = env.call("render") gravity = env.call("gravity") finally: env.close() @@ -117,7 +117,8 @@ def test_call_async_vector_env(shared_memory): assert isinstance(images, tuple) assert len(images) == 4 for i in range(4): - assert isinstance(images[i], np.ndarray) + assert len(images[i]) == 1 + assert isinstance(images[i][0], np.ndarray) assert isinstance(gravity, tuple) assert len(gravity) == 4 diff --git a/tests/vector/test_sync_vector_env.py b/tests/vector/test_sync_vector_env.py index a5c8d973d0c..1a25e684d56 100644 --- a/tests/vector/test_sync_vector_env.py +++ b/tests/vector/test_sync_vector_env.py @@ -105,11 +105,11 @@ def test_step_sync_vector_env(use_single_action_space): def test_call_sync_vector_env(): - env_fns = [make_env("CartPole-v1", i) for i in range(4)] + env_fns = [make_env("CartPole-v1", i, render_mode="rgb_array") for i in range(4)] try: env = SyncVectorEnv(env_fns) _ = env.reset() - images = env.call("render", mode="rgb_array") + images = env.call("render") gravity = env.call("gravity") finally: env.close() @@ -117,7 +117,8 @@ def test_call_sync_vector_env(): assert isinstance(images, tuple) assert len(images) == 4 for i in range(4): - assert isinstance(images[i], np.ndarray) + assert len(images[i]) == 1 + assert isinstance(images[i][0], np.ndarray) assert isinstance(gravity, tuple) assert len(gravity) == 4 diff --git a/tests/vector/utils.py b/tests/vector/utils.py index b500163ae5d..dfad4def867 100644 --- a/tests/vector/utils.py +++ b/tests/vector/utils.py @@ -107,9 +107,9 @@ def step(self, action): return observation, reward, done, {} -def make_env(env_name, seed): +def make_env(env_name, seed, **kwargs): def _make(): - env = gym.make(env_name) + env = gym.make(env_name, **kwargs) env.action_space.seed(seed) env.reset(seed=seed) return env diff --git a/tests/wrappers/test_filter_observation.py b/tests/wrappers/test_filter_observation.py index e7d5ef2b052..7dba3e0d764 100644 --- a/tests/wrappers/test_filter_observation.py +++ b/tests/wrappers/test_filter_observation.py @@ -9,7 +9,7 @@ class FakeEnvironment(gym.Env): - def __init__(self, observation_keys=("state")): + def __init__(self, render_mode=None, observation_keys=("state")): self.observation_space = spaces.Dict( { name: spaces.Box(shape=(2,), low=-1, high=1, dtype=np.float32) @@ -17,11 +17,10 @@ def __init__(self, observation_keys=("state")): } ) self.action_space = spaces.Box(shape=(1,), low=-1, high=1, dtype=np.float32) + self.render_mode = render_mode - def render(self, width=32, height=32, *args, **kwargs): - del args - del kwargs - image_shape = (height, width, 3) + def render(self, mode="human"): + image_shape = (32, 32, 3) return np.zeros(image_shape, dtype=np.uint8) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): diff --git a/tests/wrappers/test_nested_dict.py b/tests/wrappers/test_nested_dict.py index 87899724e32..f6d5e5be234 100644 --- a/tests/wrappers/test_nested_dict.py +++ b/tests/wrappers/test_nested_dict.py @@ -10,15 +10,14 @@ class FakeEnvironment(gym.Env): - def __init__(self, observation_space): + def __init__(self, observation_space, render_mode=None): self.observation_space = observation_space self.obs_keys = self.observation_space.spaces.keys() self.action_space = Box(shape=(1,), low=-1, high=1, dtype=np.float32) + self.render_mode = render_mode - def render(self, width=32, height=32, *args, **kwargs): - del args - del kwargs - image_shape = (height, width, 3) + def render(self, mode="human"): + image_shape = (32, 32, 3) return np.zeros(image_shape, dtype=np.uint8) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): diff --git a/tests/wrappers/test_pixel_observation.py b/tests/wrappers/test_pixel_observation.py index 95f094579cd..27805f62448 100644 --- a/tests/wrappers/test_pixel_observation.py +++ b/tests/wrappers/test_pixel_observation.py @@ -10,12 +10,11 @@ class FakeEnvironment(gym.Env): - def __init__(self): + def __init__(self, render_mode=None): self.action_space = spaces.Box(shape=(1,), low=-1, high=1, dtype=np.float32) + self.render_mode = render_mode - def render(self, width=32, height=32, *args, **kwargs): - del args - del kwargs + def render(self, mode="human", width=32, height=32): image_shape = (height, width, 3) return np.zeros(image_shape, dtype=np.uint8) @@ -49,7 +48,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) -class TestPixelObservationWrapper: +class TestPixelObservationWrapper(gym.Wrapper): @pytest.mark.parametrize("pixels_only", (True, False)) def test_dict_observation(self, pixels_only): pixel_key = "rgb" diff --git a/tests/wrappers/test_record_episode_statistics.py b/tests/wrappers/test_record_episode_statistics.py index 4d78fc14804..0df7de1e0fe 100644 --- a/tests/wrappers/test_record_episode_statistics.py +++ b/tests/wrappers/test_record_episode_statistics.py @@ -42,7 +42,9 @@ def test_record_episode_statistics_reset_info(): ("num_envs", "asynchronous"), [(1, False), (1, True), (4, False), (4, True)] ) def test_record_episode_statistics_with_vectorenv(num_envs, asynchronous): - envs = gym.vector.make("CartPole-v1", num_envs=num_envs, asynchronous=asynchronous) + envs = gym.vector.make( + "CartPole-v1", render_mode=None, num_envs=num_envs, asynchronous=asynchronous + ) envs = RecordEpisodeStatistics(envs) max_episode_step = ( envs.env_fns[0]().spec.max_episode_steps diff --git a/tests/wrappers/test_record_video.py b/tests/wrappers/test_record_video.py index 3530126cc81..52c2a5106e5 100644 --- a/tests/wrappers/test_record_video.py +++ b/tests/wrappers/test_record_video.py @@ -7,7 +7,7 @@ def test_record_video_using_default_trigger(): - env = gym.make("CartPole-v1") + env = gym.make("CartPole-v1", render_mode="rgb_array") env = gym.wrappers.RecordVideo(env, "videos") env.reset() for _ in range(199): @@ -25,7 +25,7 @@ def test_record_video_using_default_trigger(): def test_record_video_reset_return_info(): - env = gym.make("CartPole-v1") + env = gym.make("CartPole-v1", render_mode="rgb_array") env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0) ob_space = env.observation_space obs, info = env.reset(return_info=True) @@ -35,7 +35,7 @@ def test_record_video_reset_return_info(): assert ob_space.contains(obs) assert isinstance(info, dict) - env = gym.make("CartPole-v1") + env = gym.make("CartPole-v1", render_mode="rgb_array") env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0) ob_space = env.observation_space obs = env.reset(return_info=False) @@ -44,7 +44,7 @@ def test_record_video_reset_return_info(): shutil.rmtree("videos") assert ob_space.contains(obs) - env = gym.make("CartPole-v1") + env = gym.make("CartPole-v1", render_mode="rgb_array") env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0) ob_space = env.observation_space obs = env.reset() @@ -55,7 +55,7 @@ def test_record_video_reset_return_info(): def test_record_video_step_trigger(): - env = gym.make("CartPole-v1") + env = gym.make("CartPole-v1", render_mode="rgb_array") env._max_episode_steps = 20 env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0) env.reset() @@ -71,9 +71,9 @@ def test_record_video_step_trigger(): shutil.rmtree("videos") -def make_env(gym_id, seed): +def make_env(gym_id, seed, **kwargs): def thunk(): - env = gym.make(gym_id) + env = gym.make(gym_id, **kwargs) env._max_episode_steps = 20 if seed == 1: env = gym.wrappers.RecordVideo( @@ -85,7 +85,9 @@ def thunk(): def test_record_video_within_vector(): - envs = gym.vector.SyncVectorEnv([make_env("CartPole-v1", 1 + i) for i in range(2)]) + envs = gym.vector.SyncVectorEnv( + [make_env("CartPole-v1", 1 + i, render_mode="rgb_array") for i in range(2)] + ) envs = gym.wrappers.RecordEpisodeStatistics(envs) envs.reset() for i in range(199): diff --git a/tests/wrappers/test_video_recorder.py b/tests/wrappers/test_video_recorder.py index 2c6a6597dbd..090236a43d3 100644 --- a/tests/wrappers/test_video_recorder.py +++ b/tests/wrappers/test_video_recorder.py @@ -9,21 +9,27 @@ class BrokenRecordableEnv: - metadata = {"render_modes": [None, "rgb_array"]} + metadata = {"render_modes": ["rgb_array"]} - def render(self, mode=None): + def __init__(self, render_mode="rgb_array"): + self.render_mode = render_mode + + def render(self, mode="human"): pass class UnrecordableEnv: metadata = {"render_modes": [None]} - def render(self, mode=None): + def __init__(self, render_mode=None): + self.render_mode = render_mode + + def render(self, mode="human"): pass def test_record_simple(): - env = gym.make("CartPole-v1") + env = gym.make("CartPole-v1", render_mode="rgb_array") rec = VideoRecorder(env) env.reset() rec.capture_frame() @@ -43,7 +49,7 @@ def test_record_simple(): def test_autoclose(): def record(): - env = gym.make("CartPole-v1") + env = gym.make("CartPole-v1", render_mode="rgb_array") rec = VideoRecorder(env) env.reset() rec.capture_frame() @@ -96,7 +102,7 @@ def test_record_breaking_render_method(): def test_text_envs(): - env = gym.make("FrozenLake-v1") + env = gym.make("FrozenLake-v1", render_mode="rgb_array") video = VideoRecorder(env) try: env.reset()