envs.blackjack_wrapper

Author: John Mansfield BSD 3-Clause License

Transitions and rewards matrix from: https://github.com/rhalbersma/gym-blackjack-v1

Observations:

There are 29 * 10 = 290 discrete observable states:

29 player hands: H4-H21, S12-S21, BJ (0-28)

H4 = 0

H5 = 1

H6 = 2

H7 = 3

H8 = 4

H9 = 5

H10 = 6

H11 = 7

H12 = 8

H13 = 9

H14 = 10

H15 = 11

H16 = 12

H17 = 13

H18 = 14

H19 = 15

H20 = 16

H21 = 17

S12 = 18

S13 = 19

S14 = 20

S15 = 21

S16 = 22

S17 = 23

S18 = 24

S19 = 25

S20 = 26

S21 = 27

BJ = 28

Concatenated with 10 dealer cards: 2-9, T, A (0-9)

_2 = 0

_3 = 1

_4 = 2

_5 = 3

_6 = 4

_7 = 5

_8 = 6

_9 = 7

_T = 8 # 10, J, Q, K are all denoted as T

_A = 9

  1"""
  2Author: John Mansfield
  3BSD 3-Clause License
  4
  5# Transitions and rewards matrix from: https://github.com/rhalbersma/gym-blackjack-v1
  6#    Observations:
  7#   There are 29 * 10 = 290 discrete observable states:
  8#    29 player hands: H4-H21, S12-S21, BJ (0-28)
  9#     H4   =  0
 10#     H5   =  1
 11#     H6   =  2
 12#     H7   =  3
 13#     H8   =  4
 14#     H9   =  5
 15#     H10  =  6
 16#     H11  =  7
 17#     H12  =  8
 18#     H13  =  9
 19#     H14  = 10
 20#     H15  = 11
 21#     H16  = 12
 22#     H17  = 13
 23#     H18  = 14
 24#     H19  = 15
 25#     H20  = 16
 26#     H21  = 17
 27#     S12  = 18
 28#     S13  = 19
 29#     S14  = 20
 30#     S15  = 21
 31#     S16  = 22
 32#     S17  = 23
 33#     S18  = 24
 34#     S19  = 25
 35#     S20  = 26
 36#     S21  = 27
 37#     BJ   = 28
 38#    Concatenated with 10 dealer cards: 2-9, T, A (0-9)
 39#     _2 = 0
 40#     _3 = 1
 41#     _4 = 2
 42#     _5 = 3
 43#     _6 = 4
 44#     _7 = 5
 45#     _8 = 6
 46#     _9 = 7
 47#     _T = 8 # 10, J, Q, K are all denoted as T
 48#     _A = 9
 49"""
 50
 51import gymnasium as gym
 52import os
 53import pickle
 54
 55class CustomTransformObservation(gym.ObservationWrapper):
 56    def __init__(self, env, func, observation_space):
 57        """
 58        Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
 59        accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
 60        Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
 61        to set both the conversion function and new observation space.
 62
 63        Parameters
 64        ----------------------------
 65        env {gymnasium.Env}:
 66            Blackjack base environment to be wrapped
 67
 68        func {lambda}:
 69            Function that converts the observation
 70
 71        observation_space {gymnasium.spaces.Space}:
 72            New observation space
 73        """
 74        super().__init__(env)
 75        if observation_space is not None:
 76            self.observation_space = observation_space
 77        self.func = func
 78
 79    def observation(self, observation):
 80        """
 81        Applies a function to the observation received from the environment's step function,
 82        which is passed back to the user.
 83
 84        Parameters
 85        ----------------------------
 86        observation {Tuple}:
 87            Blackjack base environment observation tuple
 88
 89        Returns
 90        ----------------------------
 91        func(observation) {int}:
 92            The converted observation (290 discrete observable states)
 93        """
 94        return self.func(observation)
 95
 96class BlackjackWrapper(gym.Wrapper):
 97    def __init__(self, env):
 98        """
 99        Blackjack wrapper that modifies the observation space and creates a transition/reward matrix P.
100
101        Parameters
102        ----------------------------
103        env {gymnasium.Env}:
104            Blackjack base environment
105
106        Explanation of _transform_obs lambda:
107        Lambda function assigned to the variable `self._convert_state_obs` takes parameter, `state` and
108        converts the input into a compact single integer value by concatenating player hand with dealer card.
109        See comments above for further information.
110
111        """
112        self._transform_obs = lambda obs: (
113            int(f"{28}{(obs[1] - 2) % 10}") if (obs[0] == 21 and obs[2])
114            else int(f"{27}{(obs[1] - 2) % 10}") if (obs[0] == 21 and not obs[2])
115            else int(f"{obs[0] + 6}{(obs[1] - 2) % 10}") if obs[2]
116            else int(f"{obs[0] - 4}{(obs[1] - 2) % 10}"))
117        env = CustomTransformObservation(env, self._transform_obs, gym.spaces.Discrete(290))
118        super().__init__(env)
119        current_dir = os.path.dirname(__file__)
120        file_name = 'blackjack-envP.pickle'
121        f = os.path.join(current_dir, file_name)
122        with open(f, "rb") as f:
123            self._P = pickle.load(f)
124
125    @property
126    def P(self):
127        """
128        Returns
129        ----------------------------
130        _P {dict}
131        """
132        return self._P
133
134    @property
135    def transform_obs(self):
136        """
137        Returns
138        ----------------------------
139        _transform_obs {lambda}
140        """
141        return self._transform_obs
class CustomTransformObservation(gymnasium.core.Wrapper[~WrapperObsType, ~ActType]):
56class CustomTransformObservation(gym.ObservationWrapper):
57    def __init__(self, env, func, observation_space):
58        """
59        Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
60        accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
61        Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
62        to set both the conversion function and new observation space.
63
64        Parameters
65        ----------------------------
66        env {gymnasium.Env}:
67            Blackjack base environment to be wrapped
68
69        func {lambda}:
70            Function that converts the observation
71
72        observation_space {gymnasium.spaces.Space}:
73            New observation space
74        """
75        super().__init__(env)
76        if observation_space is not None:
77            self.observation_space = observation_space
78        self.func = func
79
80    def observation(self, observation):
81        """
82        Applies a function to the observation received from the environment's step function,
83        which is passed back to the user.
84
85        Parameters
86        ----------------------------
87        observation {Tuple}:
88            Blackjack base environment observation tuple
89
90        Returns
91        ----------------------------
92        func(observation) {int}:
93            The converted observation (290 discrete observable states)
94        """
95        return self.func(observation)

Superclass of wrappers that can modify observations using observation() for reset() and step().

If you would like to apply a function to only the observation before passing it to the learning code, you can simply inherit from ObservationWrapper and overwrite the method observation() to implement that transformation. The transformation defined in that method must be reflected by the env observation space. Otherwise, you need to specify the new observation space of the wrapper by setting self.observation_space in the __init__() method of your wrapper.

Among others, Gymnasium provides the observation wrapper TimeAwareObservation, which adds information about the index of the timestep to the observation.

CustomTransformObservation(env, func, observation_space)
57    def __init__(self, env, func, observation_space):
58        """
59        Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
60        accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
61        Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
62        to set both the conversion function and new observation space.
63
64        Parameters
65        ----------------------------
66        env {gymnasium.Env}:
67            Blackjack base environment to be wrapped
68
69        func {lambda}:
70            Function that converts the observation
71
72        observation_space {gymnasium.spaces.Space}:
73            New observation space
74        """
75        super().__init__(env)
76        if observation_space is not None:
77            self.observation_space = observation_space
78        self.func = func

Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int). Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation) to set both the conversion function and new observation space.

Parameters
  • env {gymnasium.Env}:: Blackjack base environment to be wrapped
  • func {lambda}:: Function that converts the observation
  • observation_space {gymnasium.spaces.Space}:: New observation space
func
def observation(self, observation):
80    def observation(self, observation):
81        """
82        Applies a function to the observation received from the environment's step function,
83        which is passed back to the user.
84
85        Parameters
86        ----------------------------
87        observation {Tuple}:
88            Blackjack base environment observation tuple
89
90        Returns
91        ----------------------------
92        func(observation) {int}:
93            The converted observation (290 discrete observable states)
94        """
95        return self.func(observation)

Applies a function to the observation received from the environment's step function, which is passed back to the user.

Parameters
  • observation {Tuple}:: Blackjack base environment observation tuple
Returns
  • func(observation) {int}:: The converted observation (290 discrete observable states)
class BlackjackWrapper(gymnasium.core.Env[~WrapperObsType, ~WrapperActType]):
 97class BlackjackWrapper(gym.Wrapper):
 98    def __init__(self, env):
 99        """
100        Blackjack wrapper that modifies the observation space and creates a transition/reward matrix P.
101
102        Parameters
103        ----------------------------
104        env {gymnasium.Env}:
105            Blackjack base environment
106
107        Explanation of _transform_obs lambda:
108        Lambda function assigned to the variable `self._convert_state_obs` takes parameter, `state` and
109        converts the input into a compact single integer value by concatenating player hand with dealer card.
110        See comments above for further information.
111
112        """
113        self._transform_obs = lambda obs: (
114            int(f"{28}{(obs[1] - 2) % 10}") if (obs[0] == 21 and obs[2])
115            else int(f"{27}{(obs[1] - 2) % 10}") if (obs[0] == 21 and not obs[2])
116            else int(f"{obs[0] + 6}{(obs[1] - 2) % 10}") if obs[2]
117            else int(f"{obs[0] - 4}{(obs[1] - 2) % 10}"))
118        env = CustomTransformObservation(env, self._transform_obs, gym.spaces.Discrete(290))
119        super().__init__(env)
120        current_dir = os.path.dirname(__file__)
121        file_name = 'blackjack-envP.pickle'
122        f = os.path.join(current_dir, file_name)
123        with open(f, "rb") as f:
124            self._P = pickle.load(f)
125
126    @property
127    def P(self):
128        """
129        Returns
130        ----------------------------
131        _P {dict}
132        """
133        return self._P
134
135    @property
136    def transform_obs(self):
137        """
138        Returns
139        ----------------------------
140        _transform_obs {lambda}
141        """
142        return self._transform_obs

Wraps a gymnasium.Env to allow a modular transformation of the step() and reset() methods.

This class is the base class of all wrappers to change the behavior of the underlying environment. Wrappers that inherit from this class can modify the action_space, observation_space, reward_range and metadata attributes, without changing the underlying environment's attributes. Moreover, the behavior of the step() and reset() methods can be changed by these wrappers.

Some attributes (spec, render_mode, np_random) will point back to the wrapper's environment (i.e. to the corresponding attributes of env).

Note: If you inherit from Wrapper, don't forget to call super().__init__(env)

BlackjackWrapper(env)
 98    def __init__(self, env):
 99        """
100        Blackjack wrapper that modifies the observation space and creates a transition/reward matrix P.
101
102        Parameters
103        ----------------------------
104        env {gymnasium.Env}:
105            Blackjack base environment
106
107        Explanation of _transform_obs lambda:
108        Lambda function assigned to the variable `self._convert_state_obs` takes parameter, `state` and
109        converts the input into a compact single integer value by concatenating player hand with dealer card.
110        See comments above for further information.
111
112        """
113        self._transform_obs = lambda obs: (
114            int(f"{28}{(obs[1] - 2) % 10}") if (obs[0] == 21 and obs[2])
115            else int(f"{27}{(obs[1] - 2) % 10}") if (obs[0] == 21 and not obs[2])
116            else int(f"{obs[0] + 6}{(obs[1] - 2) % 10}") if obs[2]
117            else int(f"{obs[0] - 4}{(obs[1] - 2) % 10}"))
118        env = CustomTransformObservation(env, self._transform_obs, gym.spaces.Discrete(290))
119        super().__init__(env)
120        current_dir = os.path.dirname(__file__)
121        file_name = 'blackjack-envP.pickle'
122        f = os.path.join(current_dir, file_name)
123        with open(f, "rb") as f:
124            self._P = pickle.load(f)

Blackjack wrapper that modifies the observation space and creates a transition/reward matrix P.

Parameters
  • env {gymnasium.Env}:: Blackjack base environment
  • Explanation of _transform_obs lambda:
  • Lambda function assigned to the variable self._convert_state_obs takes parameter, state and
  • converts the input into a compact single integer value by concatenating player hand with dealer card.
  • See comments above for further information.
P
126    @property
127    def P(self):
128        """
129        Returns
130        ----------------------------
131        _P {dict}
132        """
133        return self._P
Returns
  • _P {dict}
transform_obs
135    @property
136    def transform_obs(self):
137        """
138        Returns
139        ----------------------------
140        _transform_obs {lambda}
141        """
142        return self._transform_obs
Returns
  • _transform_obs {lambda}