bettermdptools.envs.acrobot_wrapper

  1import gymnasium as gym
  2
  3from bettermdptools.envs.acrobot_model import DiscretizedAcrobot
  4
  5
  6class CustomTransformObservation(gym.ObservationWrapper):
  7    def __init__(self, env, func, observation_space):
  8        """
  9        Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
 10        accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
 11        Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
 12        to set both the conversion function and new observation space.
 13
 14        Parameters
 15        ----------
 16        env : gymnasium.Env
 17            Base environment to be wrapped
 18        func : lambda
 19            Function that converts the observation
 20        observation_space : gymnasium.spaces.Space
 21            New observation space
 22        """
 23        super().__init__(env)
 24        if observation_space is not None:
 25            self.observation_space = observation_space
 26        self.func = func
 27
 28    def observation(self, observation):
 29        """
 30        Applies a function to the observation received from the environment's step function,
 31        which is passed back to the user.
 32
 33        Parameters
 34        ----------
 35        observation : Tuple
 36            Base environment observation tuple
 37
 38        Returns
 39        -------
 40        int
 41            The converted observation (int).
 42        """
 43        return self.func(observation)
 44
 45
 46class AcrobotWrapper(gym.Wrapper):
 47    def __init__(
 48        self,
 49        env,
 50        angular_resolution_rad=0.01,
 51        angular_vel_resolution_rad_per_sec=0.05,
 52        angle_bins=None,
 53        velocity_bins=None,
 54        precomputed_P=None,
 55    ):
 56        """
 57        Cartpole wrapper that modifies the observation space and creates a transition/reward matrix P.
 58
 59        Parameters
 60        ----------
 61        env : gymnasium.Env
 62            Base environment
 63        angular_resolution_rad : float
 64            The resolution of angle bins in radians.
 65        angular_vel_resolution_rad_per_sec : float
 66            The resolution of angular velocity bins in radians per second.
 67        angle_bins : int, optional
 68            Number of discrete bins for the angles.
 69        velocity_bins : int, optional
 70            Number of discrete bins for the velocities.
 71        precomputed_P : dict, optional
 72            Precomputed transition probability matrix.
 73        """
 74        acro = DiscretizedAcrobot(
 75            angular_resolution_rad=angular_resolution_rad,
 76            angular_vel_resolution_rad_per_sec=angular_vel_resolution_rad_per_sec,
 77            angle_bins=angle_bins,
 78            velocity_bins=velocity_bins,
 79            precomputed_P=precomputed_P,
 80        )
 81        self._P = acro.P
 82        self._transform_obs = acro.transform_obs
 83        env = CustomTransformObservation(
 84            env, self._transform_obs, gym.spaces.Discrete(acro.n_states)
 85        )
 86        super().__init__(env)
 87
 88    @property
 89    def P(self):
 90        """
 91        Returns
 92        -------
 93        dict
 94        """
 95        return self._P
 96
 97    @property
 98    def transform_obs(self):
 99        """
100        Returns
101        -------
102        lambda
103        """
104        return self._transform_obs
class CustomTransformObservation(gymnasium.core.Wrapper[~WrapperObsType, ~ActType]):
 7class CustomTransformObservation(gym.ObservationWrapper):
 8    def __init__(self, env, func, observation_space):
 9        """
10        Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
11        accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
12        Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
13        to set both the conversion function and new observation space.
14
15        Parameters
16        ----------
17        env : gymnasium.Env
18            Base environment to be wrapped
19        func : lambda
20            Function that converts the observation
21        observation_space : gymnasium.spaces.Space
22            New observation space
23        """
24        super().__init__(env)
25        if observation_space is not None:
26            self.observation_space = observation_space
27        self.func = func
28
29    def observation(self, observation):
30        """
31        Applies a function to the observation received from the environment's step function,
32        which is passed back to the user.
33
34        Parameters
35        ----------
36        observation : Tuple
37            Base environment observation tuple
38
39        Returns
40        -------
41        int
42            The converted observation (int).
43        """
44        return self.func(observation)

Superclass of wrappers that can modify observations using observation() for reset() and step().

If you would like to apply a function to only the observation before passing it to the learning code, you can simply inherit from ObservationWrapper and overwrite the method observation() to implement that transformation. The transformation defined in that method must be reflected by the env observation space. Otherwise, you need to specify the new observation space of the wrapper by setting self.observation_space in the __init__() method of your wrapper.

Among others, Gymnasium provides the observation wrapper TimeAwareObservation, which adds information about the index of the timestep to the observation.

CustomTransformObservation(env, func, observation_space)
 8    def __init__(self, env, func, observation_space):
 9        """
10        Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
11        accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
12        Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
13        to set both the conversion function and new observation space.
14
15        Parameters
16        ----------
17        env : gymnasium.Env
18            Base environment to be wrapped
19        func : lambda
20            Function that converts the observation
21        observation_space : gymnasium.spaces.Space
22            New observation space
23        """
24        super().__init__(env)
25        if observation_space is not None:
26            self.observation_space = observation_space
27        self.func = func

Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int). Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation) to set both the conversion function and new observation space.

Parameters
  • env (gymnasium.Env): Base environment to be wrapped
  • func (lambda): Function that converts the observation
  • observation_space (gymnasium.spaces.Space): New observation space
func
def observation(self, observation):
29    def observation(self, observation):
30        """
31        Applies a function to the observation received from the environment's step function,
32        which is passed back to the user.
33
34        Parameters
35        ----------
36        observation : Tuple
37            Base environment observation tuple
38
39        Returns
40        -------
41        int
42            The converted observation (int).
43        """
44        return self.func(observation)

Applies a function to the observation received from the environment's step function, which is passed back to the user.

Parameters
  • observation (Tuple): Base environment observation tuple
Returns
  • int: The converted observation (int).
class AcrobotWrapper(gymnasium.core.Env[~WrapperObsType, ~WrapperActType]):
 47class AcrobotWrapper(gym.Wrapper):
 48    def __init__(
 49        self,
 50        env,
 51        angular_resolution_rad=0.01,
 52        angular_vel_resolution_rad_per_sec=0.05,
 53        angle_bins=None,
 54        velocity_bins=None,
 55        precomputed_P=None,
 56    ):
 57        """
 58        Cartpole wrapper that modifies the observation space and creates a transition/reward matrix P.
 59
 60        Parameters
 61        ----------
 62        env : gymnasium.Env
 63            Base environment
 64        angular_resolution_rad : float
 65            The resolution of angle bins in radians.
 66        angular_vel_resolution_rad_per_sec : float
 67            The resolution of angular velocity bins in radians per second.
 68        angle_bins : int, optional
 69            Number of discrete bins for the angles.
 70        velocity_bins : int, optional
 71            Number of discrete bins for the velocities.
 72        precomputed_P : dict, optional
 73            Precomputed transition probability matrix.
 74        """
 75        acro = DiscretizedAcrobot(
 76            angular_resolution_rad=angular_resolution_rad,
 77            angular_vel_resolution_rad_per_sec=angular_vel_resolution_rad_per_sec,
 78            angle_bins=angle_bins,
 79            velocity_bins=velocity_bins,
 80            precomputed_P=precomputed_P,
 81        )
 82        self._P = acro.P
 83        self._transform_obs = acro.transform_obs
 84        env = CustomTransformObservation(
 85            env, self._transform_obs, gym.spaces.Discrete(acro.n_states)
 86        )
 87        super().__init__(env)
 88
 89    @property
 90    def P(self):
 91        """
 92        Returns
 93        -------
 94        dict
 95        """
 96        return self._P
 97
 98    @property
 99    def transform_obs(self):
100        """
101        Returns
102        -------
103        lambda
104        """
105        return self._transform_obs

Wraps a gymnasium.Env to allow a modular transformation of the step() and reset() methods.

This class is the base class of all wrappers to change the behavior of the underlying environment. Wrappers that inherit from this class can modify the action_space, observation_space, reward_range and metadata attributes, without changing the underlying environment's attributes. Moreover, the behavior of the step() and reset() methods can be changed by these wrappers.

Some attributes (spec, render_mode, np_random) will point back to the wrapper's environment (i.e. to the corresponding attributes of env).

Note: If you inherit from Wrapper, don't forget to call super().__init__(env)

AcrobotWrapper( env, angular_resolution_rad=0.01, angular_vel_resolution_rad_per_sec=0.05, angle_bins=None, velocity_bins=None, precomputed_P=None)
48    def __init__(
49        self,
50        env,
51        angular_resolution_rad=0.01,
52        angular_vel_resolution_rad_per_sec=0.05,
53        angle_bins=None,
54        velocity_bins=None,
55        precomputed_P=None,
56    ):
57        """
58        Cartpole wrapper that modifies the observation space and creates a transition/reward matrix P.
59
60        Parameters
61        ----------
62        env : gymnasium.Env
63            Base environment
64        angular_resolution_rad : float
65            The resolution of angle bins in radians.
66        angular_vel_resolution_rad_per_sec : float
67            The resolution of angular velocity bins in radians per second.
68        angle_bins : int, optional
69            Number of discrete bins for the angles.
70        velocity_bins : int, optional
71            Number of discrete bins for the velocities.
72        precomputed_P : dict, optional
73            Precomputed transition probability matrix.
74        """
75        acro = DiscretizedAcrobot(
76            angular_resolution_rad=angular_resolution_rad,
77            angular_vel_resolution_rad_per_sec=angular_vel_resolution_rad_per_sec,
78            angle_bins=angle_bins,
79            velocity_bins=velocity_bins,
80            precomputed_P=precomputed_P,
81        )
82        self._P = acro.P
83        self._transform_obs = acro.transform_obs
84        env = CustomTransformObservation(
85            env, self._transform_obs, gym.spaces.Discrete(acro.n_states)
86        )
87        super().__init__(env)

Cartpole wrapper that modifies the observation space and creates a transition/reward matrix P.

Parameters
  • env (gymnasium.Env): Base environment
  • angular_resolution_rad (float): The resolution of angle bins in radians.
  • angular_vel_resolution_rad_per_sec (float): The resolution of angular velocity bins in radians per second.
  • angle_bins (int, optional): Number of discrete bins for the angles.
  • velocity_bins (int, optional): Number of discrete bins for the velocities.
  • precomputed_P (dict, optional): Precomputed transition probability matrix.
P
89    @property
90    def P(self):
91        """
92        Returns
93        -------
94        dict
95        """
96        return self._P
Returns
  • dict
transform_obs
 98    @property
 99    def transform_obs(self):
100        """
101        Returns
102        -------
103        lambda
104        """
105        return self._transform_obs
Returns
  • lambda