envs.acrobot_wrapper

 1import gymnasium as gym
 2import numpy as np
 3from bettermdptools.envs.acrobot_model import DiscretizedAcrobot
 4
 5class CustomTransformObservation(gym.ObservationWrapper):
 6  def __init__(self, env, func, observation_space):
 7      """
 8      Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
 9      accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
10      Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
11      to set both the conversion function and new observation space.
12
13      Parameters
14      ----------------------------
15      env {gymnasium.Env}:
16          Base environment to be wrapped
17
18      func {lambda}:
19          Function that converts the observation
20
21      observation_space {gymnasium.spaces.Space}:
22          New observation space
23      """
24      super().__init__(env)
25      if observation_space is not None:
26          self.observation_space = observation_space
27      self.func = func
28
29  def observation(self, observation):
30      """
31      Applies a function to the observation received from the environment's step function,
32      which is passed back to the user.
33
34      Parameters
35      ----------------------------
36      observation {Tuple}:
37          Base environment observation tuple
38
39      Returns
40      ----------------------------
41      func(observation) {int}:
42          The converted observation (int).
43      """
44      return self.func(observation)
45
46class AcrobotWrapper(gym.Wrapper):
47  def __init__(self,
48                env,
49                angular_resolution_rad = 0.01,
50                angular_vel_resolution_rad_per_sec = 0.05,
51                angle_bins = None,
52                velocity_bins = None,
53                precomputed_P = None,
54                timestep_sec = 0.1):
55      """
56      Cartpole wrapper that modifies the observation space and creates a transition/reward matrix P.
57
58      Parameters
59      ----------------------------
60      env {gymnasium.Env}: Base environment
61      position_bins (int): Number of discrete bins for the cart's position.
62      velocity_bins (int): Number of discrete bins for the cart's velocity.
63      angular_velocity_bins (int): Number of discrete bins for the pole's angular velocity.
64      angular_center_resolution (float): The resolution of angle bins near the center (around zero).
65      angular_outer_resolution (float): The resolution of angle bins away from the center.
66      """
67      acro = DiscretizedAcrobot(angular_resolution_rad=angular_resolution_rad,
68                                angular_vel_resolution_rad_per_sec = angular_vel_resolution_rad_per_sec,
69                                angle_bins=angle_bins,
70                                velocity_bins=velocity_bins,
71                                precomputed_P = precomputed_P,
72                                timestep_sec=timestep_sec)
73      self._P = acro.P
74      self._transform_obs = acro.transform_obs
75      env = CustomTransformObservation(env, self._transform_obs, gym.spaces.Discrete(acro.n_states))
76      super().__init__(env)
77
78  @property
79  def P(self):
80      """
81      Returns
82      ----------------------------
83      _P {dict}
84      """
85      return self._P
86
87  @property
88  def transform_obs(self):
89      """
90      Returns
91      ----------------------------
92      _transform_obs {lambda}
93      """
94      return self._transform_obs
class CustomTransformObservation(gymnasium.core.Wrapper[~WrapperObsType, ~ActType]):
 6class CustomTransformObservation(gym.ObservationWrapper):
 7  def __init__(self, env, func, observation_space):
 8      """
 9      Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
10      accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
11      Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
12      to set both the conversion function and new observation space.
13
14      Parameters
15      ----------------------------
16      env {gymnasium.Env}:
17          Base environment to be wrapped
18
19      func {lambda}:
20          Function that converts the observation
21
22      observation_space {gymnasium.spaces.Space}:
23          New observation space
24      """
25      super().__init__(env)
26      if observation_space is not None:
27          self.observation_space = observation_space
28      self.func = func
29
30  def observation(self, observation):
31      """
32      Applies a function to the observation received from the environment's step function,
33      which is passed back to the user.
34
35      Parameters
36      ----------------------------
37      observation {Tuple}:
38          Base environment observation tuple
39
40      Returns
41      ----------------------------
42      func(observation) {int}:
43          The converted observation (int).
44      """
45      return self.func(observation)

Superclass of wrappers that can modify observations using observation() for reset() and step().

If you would like to apply a function to only the observation before passing it to the learning code, you can simply inherit from ObservationWrapper and overwrite the method observation() to implement that transformation. The transformation defined in that method must be reflected by the env observation space. Otherwise, you need to specify the new observation space of the wrapper by setting self.observation_space in the __init__() method of your wrapper.

Among others, Gymnasium provides the observation wrapper TimeAwareObservation, which adds information about the index of the timestep to the observation.

CustomTransformObservation(env, func, observation_space)
 7  def __init__(self, env, func, observation_space):
 8      """
 9      Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
10      accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
11      Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
12      to set both the conversion function and new observation space.
13
14      Parameters
15      ----------------------------
16      env {gymnasium.Env}:
17          Base environment to be wrapped
18
19      func {lambda}:
20          Function that converts the observation
21
22      observation_space {gymnasium.spaces.Space}:
23          New observation space
24      """
25      super().__init__(env)
26      if observation_space is not None:
27          self.observation_space = observation_space
28      self.func = func

Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int). Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation) to set both the conversion function and new observation space.

Parameters
  • env {gymnasium.Env}:: Base environment to be wrapped
  • func {lambda}:: Function that converts the observation
  • observation_space {gymnasium.spaces.Space}:: New observation space
func
def observation(self, observation):
30  def observation(self, observation):
31      """
32      Applies a function to the observation received from the environment's step function,
33      which is passed back to the user.
34
35      Parameters
36      ----------------------------
37      observation {Tuple}:
38          Base environment observation tuple
39
40      Returns
41      ----------------------------
42      func(observation) {int}:
43          The converted observation (int).
44      """
45      return self.func(observation)

Applies a function to the observation received from the environment's step function, which is passed back to the user.

Parameters
  • observation {Tuple}:: Base environment observation tuple
Returns
  • func(observation) {int}:: The converted observation (int).
class AcrobotWrapper(gymnasium.core.Env[~WrapperObsType, ~WrapperActType]):
47class AcrobotWrapper(gym.Wrapper):
48  def __init__(self,
49                env,
50                angular_resolution_rad = 0.01,
51                angular_vel_resolution_rad_per_sec = 0.05,
52                angle_bins = None,
53                velocity_bins = None,
54                precomputed_P = None,
55                timestep_sec = 0.1):
56      """
57      Cartpole wrapper that modifies the observation space and creates a transition/reward matrix P.
58
59      Parameters
60      ----------------------------
61      env {gymnasium.Env}: Base environment
62      position_bins (int): Number of discrete bins for the cart's position.
63      velocity_bins (int): Number of discrete bins for the cart's velocity.
64      angular_velocity_bins (int): Number of discrete bins for the pole's angular velocity.
65      angular_center_resolution (float): The resolution of angle bins near the center (around zero).
66      angular_outer_resolution (float): The resolution of angle bins away from the center.
67      """
68      acro = DiscretizedAcrobot(angular_resolution_rad=angular_resolution_rad,
69                                angular_vel_resolution_rad_per_sec = angular_vel_resolution_rad_per_sec,
70                                angle_bins=angle_bins,
71                                velocity_bins=velocity_bins,
72                                precomputed_P = precomputed_P,
73                                timestep_sec=timestep_sec)
74      self._P = acro.P
75      self._transform_obs = acro.transform_obs
76      env = CustomTransformObservation(env, self._transform_obs, gym.spaces.Discrete(acro.n_states))
77      super().__init__(env)
78
79  @property
80  def P(self):
81      """
82      Returns
83      ----------------------------
84      _P {dict}
85      """
86      return self._P
87
88  @property
89  def transform_obs(self):
90      """
91      Returns
92      ----------------------------
93      _transform_obs {lambda}
94      """
95      return self._transform_obs

Wraps a gymnasium.Env to allow a modular transformation of the step() and reset() methods.

This class is the base class of all wrappers to change the behavior of the underlying environment. Wrappers that inherit from this class can modify the action_space, observation_space, reward_range and metadata attributes, without changing the underlying environment's attributes. Moreover, the behavior of the step() and reset() methods can be changed by these wrappers.

Some attributes (spec, render_mode, np_random) will point back to the wrapper's environment (i.e. to the corresponding attributes of env).

Note: If you inherit from Wrapper, don't forget to call super().__init__(env)

AcrobotWrapper( env, angular_resolution_rad=0.01, angular_vel_resolution_rad_per_sec=0.05, angle_bins=None, velocity_bins=None, precomputed_P=None, timestep_sec=0.1)
48  def __init__(self,
49                env,
50                angular_resolution_rad = 0.01,
51                angular_vel_resolution_rad_per_sec = 0.05,
52                angle_bins = None,
53                velocity_bins = None,
54                precomputed_P = None,
55                timestep_sec = 0.1):
56      """
57      Cartpole wrapper that modifies the observation space and creates a transition/reward matrix P.
58
59      Parameters
60      ----------------------------
61      env {gymnasium.Env}: Base environment
62      position_bins (int): Number of discrete bins for the cart's position.
63      velocity_bins (int): Number of discrete bins for the cart's velocity.
64      angular_velocity_bins (int): Number of discrete bins for the pole's angular velocity.
65      angular_center_resolution (float): The resolution of angle bins near the center (around zero).
66      angular_outer_resolution (float): The resolution of angle bins away from the center.
67      """
68      acro = DiscretizedAcrobot(angular_resolution_rad=angular_resolution_rad,
69                                angular_vel_resolution_rad_per_sec = angular_vel_resolution_rad_per_sec,
70                                angle_bins=angle_bins,
71                                velocity_bins=velocity_bins,
72                                precomputed_P = precomputed_P,
73                                timestep_sec=timestep_sec)
74      self._P = acro.P
75      self._transform_obs = acro.transform_obs
76      env = CustomTransformObservation(env, self._transform_obs, gym.spaces.Discrete(acro.n_states))
77      super().__init__(env)

Cartpole wrapper that modifies the observation space and creates a transition/reward matrix P.

Parameters
  • env {gymnasium.Env} (Base environment):

  • position_bins (int) (Number of discrete bins for the cart's position.):

  • velocity_bins (int) (Number of discrete bins for the cart's velocity.):

  • angular_velocity_bins (int) (Number of discrete bins for the pole's angular velocity.):

  • angular_center_resolution (float) (The resolution of angle bins near the center (around zero).):

  • angular_outer_resolution (float) (The resolution of angle bins away from the center.):

P
79  @property
80  def P(self):
81      """
82      Returns
83      ----------------------------
84      _P {dict}
85      """
86      return self._P
Returns
  • _P {dict}
transform_obs
88  @property
89  def transform_obs(self):
90      """
91      Returns
92      ----------------------------
93      _transform_obs {lambda}
94      """
95      return self._transform_obs
Returns
  • _transform_obs {lambda}