bettermdptools.envs.blackjack_wrapper

Author: John Mansfield BSD 3-Clause License

Transitions and rewards matrix from: https://github.com/rhalbersma/gym-blackjack-v1

Observations:

There are 29 * 10 = 290 discrete observable states:

29 player hands: H4-H21, S12-S21, BJ (0-28)

H4 = 0

H5 = 1

H6 = 2

H7 = 3

H8 = 4

H9 = 5

H10 = 6

H11 = 7

H12 = 8

H13 = 9

H14 = 10

H15 = 11

H16 = 12

H17 = 13

H18 = 14

H19 = 15

H20 = 16

H21 = 17

S12 = 18

S13 = 19

S14 = 20

S15 = 21

S16 = 22

S17 = 23

S18 = 24

S19 = 25

S20 = 26

S21 = 27

BJ = 28

Concatenated with 10 dealer cards: 2-9, T, A (0-9)

_2 = 0

_3 = 1

_4 = 2

_5 = 3

_6 = 4

_7 = 5

_8 = 6

_9 = 7

_T = 8 # 10, J, Q, K are all denoted as T

_A = 9

  1"""
  2Author: John Mansfield
  3BSD 3-Clause License
  4
  5# Transitions and rewards matrix from: https://github.com/rhalbersma/gym-blackjack-v1
  6#    Observations:
  7#   There are 29 * 10 = 290 discrete observable states:
  8#    29 player hands: H4-H21, S12-S21, BJ (0-28)
  9#     H4   =  0
 10#     H5   =  1
 11#     H6   =  2
 12#     H7   =  3
 13#     H8   =  4
 14#     H9   =  5
 15#     H10  =  6
 16#     H11  =  7
 17#     H12  =  8
 18#     H13  =  9
 19#     H14  = 10
 20#     H15  = 11
 21#     H16  = 12
 22#     H17  = 13
 23#     H18  = 14
 24#     H19  = 15
 25#     H20  = 16
 26#     H21  = 17
 27#     S12  = 18
 28#     S13  = 19
 29#     S14  = 20
 30#     S15  = 21
 31#     S16  = 22
 32#     S17  = 23
 33#     S18  = 24
 34#     S19  = 25
 35#     S20  = 26
 36#     S21  = 27
 37#     BJ   = 28
 38#    Concatenated with 10 dealer cards: 2-9, T, A (0-9)
 39#     _2 = 0
 40#     _3 = 1
 41#     _4 = 2
 42#     _5 = 3
 43#     _6 = 4
 44#     _7 = 5
 45#     _8 = 6
 46#     _9 = 7
 47#     _T = 8 # 10, J, Q, K are all denoted as T
 48#     _A = 9
 49"""
 50
 51import os
 52import pickle
 53
 54import gymnasium as gym
 55
 56
 57class CustomTransformObservation(gym.ObservationWrapper):
 58    def __init__(self, env, func, observation_space):
 59        """
 60        Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
 61        accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
 62        Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
 63        to set both the conversion function and new observation space.
 64
 65        Parameters
 66        ----------
 67        env : gymnasium.Env
 68            Blackjack base environment to be wrapped.
 69        func : lambda
 70            Function that converts the observation.
 71        observation_space : gymnasium.spaces.Space
 72            New observation space.
 73        """
 74        super().__init__(env)
 75        if observation_space is not None:
 76            self.observation_space = observation_space
 77        self.func = func
 78
 79    def observation(self, observation):
 80        """
 81        Applies a function to the observation received from the environment's step function,
 82        which is passed back to the user.
 83
 84        Parameters
 85        ----------
 86        observation : Tuple
 87            Blackjack base environment observation tuple.
 88
 89        Returns
 90        -------
 91        int
 92            The converted observation (290 discrete observable states).
 93        """
 94        return self.func(observation)
 95
 96
 97class BlackjackWrapper(gym.Wrapper):
 98    def __init__(self, env):
 99        """
100        Blackjack wrapper that modifies the observation space and creates a transition/reward matrix P.
101
102        Parameters
103        ----------
104        env : gymnasium.Env
105            Blackjack base environment.
106
107        Explanation of _transform_obs lambda:
108        Lambda function assigned to the variable `self._convert_state_obs` takes parameter, `state` and
109        converts the input into a compact single integer value by concatenating player hand with dealer card.
110        See comments above for further information.
111        """
112        self._transform_obs = lambda obs: (
113            int(f"{28}{(obs[1] - 2) % 10}")
114            if (obs[0] == 21 and obs[2])
115            else int(f"{27}{(obs[1] - 2) % 10}")
116            if (obs[0] == 21 and not obs[2])
117            else int(f"{obs[0] + 6}{(obs[1] - 2) % 10}")
118            if obs[2]
119            else int(f"{obs[0] - 4}{(obs[1] - 2) % 10}")
120        )
121        env = CustomTransformObservation(
122            env, self._transform_obs, gym.spaces.Discrete(290)
123        )
124        super().__init__(env)
125        current_dir = os.path.dirname(__file__)
126        file_name = "blackjack-envP.pickle"
127        f = os.path.join(current_dir, file_name)
128        with open(f, "rb") as f:
129            self._P = pickle.load(f)
130
131    @property
132    def P(self):
133        """
134        Returns
135        -------
136        dict
137            Transition/reward matrix.
138        """
139        return self._P
140
141    @property
142    def transform_obs(self):
143        """
144        Returns
145        -------
146        lambda
147            Function that converts the observation.
148        """
149        return self._transform_obs
class CustomTransformObservation(gymnasium.core.Wrapper[~WrapperObsType, ~ActType]):
58class CustomTransformObservation(gym.ObservationWrapper):
59    def __init__(self, env, func, observation_space):
60        """
61        Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
62        accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
63        Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
64        to set both the conversion function and new observation space.
65
66        Parameters
67        ----------
68        env : gymnasium.Env
69            Blackjack base environment to be wrapped.
70        func : lambda
71            Function that converts the observation.
72        observation_space : gymnasium.spaces.Space
73            New observation space.
74        """
75        super().__init__(env)
76        if observation_space is not None:
77            self.observation_space = observation_space
78        self.func = func
79
80    def observation(self, observation):
81        """
82        Applies a function to the observation received from the environment's step function,
83        which is passed back to the user.
84
85        Parameters
86        ----------
87        observation : Tuple
88            Blackjack base environment observation tuple.
89
90        Returns
91        -------
92        int
93            The converted observation (290 discrete observable states).
94        """
95        return self.func(observation)

Superclass of wrappers that can modify observations using observation() for reset() and step().

If you would like to apply a function to only the observation before passing it to the learning code, you can simply inherit from ObservationWrapper and overwrite the method observation() to implement that transformation. The transformation defined in that method must be reflected by the env observation space. Otherwise, you need to specify the new observation space of the wrapper by setting self.observation_space in the __init__() method of your wrapper.

Among others, Gymnasium provides the observation wrapper TimeAwareObservation, which adds information about the index of the timestep to the observation.

CustomTransformObservation(env, func, observation_space)
59    def __init__(self, env, func, observation_space):
60        """
61        Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not
62        accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int).
63        Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation)
64        to set both the conversion function and new observation space.
65
66        Parameters
67        ----------
68        env : gymnasium.Env
69            Blackjack base environment to be wrapped.
70        func : lambda
71            Function that converts the observation.
72        observation_space : gymnasium.spaces.Space
73            New observation space.
74        """
75        super().__init__(env)
76        if observation_space is not None:
77            self.observation_space = observation_space
78        self.func = func

Helper class that modifies the observation space. The v26 gymnasium TransformObservation wrapper does not accept an observation_space parameter, which is needed in order to match the lambda conversion (tuple->int). Instead, we subclass gym.ObservationWrapper (parent class of gym.TransformObservation) to set both the conversion function and new observation space.

Parameters
  • env (gymnasium.Env): Blackjack base environment to be wrapped.
  • func (lambda): Function that converts the observation.
  • observation_space (gymnasium.spaces.Space): New observation space.
func
def observation(self, observation):
80    def observation(self, observation):
81        """
82        Applies a function to the observation received from the environment's step function,
83        which is passed back to the user.
84
85        Parameters
86        ----------
87        observation : Tuple
88            Blackjack base environment observation tuple.
89
90        Returns
91        -------
92        int
93            The converted observation (290 discrete observable states).
94        """
95        return self.func(observation)

Applies a function to the observation received from the environment's step function, which is passed back to the user.

Parameters
  • observation (Tuple): Blackjack base environment observation tuple.
Returns
  • int: The converted observation (290 discrete observable states).
class BlackjackWrapper(gymnasium.core.Env[~WrapperObsType, ~WrapperActType]):
 98class BlackjackWrapper(gym.Wrapper):
 99    def __init__(self, env):
100        """
101        Blackjack wrapper that modifies the observation space and creates a transition/reward matrix P.
102
103        Parameters
104        ----------
105        env : gymnasium.Env
106            Blackjack base environment.
107
108        Explanation of _transform_obs lambda:
109        Lambda function assigned to the variable `self._convert_state_obs` takes parameter, `state` and
110        converts the input into a compact single integer value by concatenating player hand with dealer card.
111        See comments above for further information.
112        """
113        self._transform_obs = lambda obs: (
114            int(f"{28}{(obs[1] - 2) % 10}")
115            if (obs[0] == 21 and obs[2])
116            else int(f"{27}{(obs[1] - 2) % 10}")
117            if (obs[0] == 21 and not obs[2])
118            else int(f"{obs[0] + 6}{(obs[1] - 2) % 10}")
119            if obs[2]
120            else int(f"{obs[0] - 4}{(obs[1] - 2) % 10}")
121        )
122        env = CustomTransformObservation(
123            env, self._transform_obs, gym.spaces.Discrete(290)
124        )
125        super().__init__(env)
126        current_dir = os.path.dirname(__file__)
127        file_name = "blackjack-envP.pickle"
128        f = os.path.join(current_dir, file_name)
129        with open(f, "rb") as f:
130            self._P = pickle.load(f)
131
132    @property
133    def P(self):
134        """
135        Returns
136        -------
137        dict
138            Transition/reward matrix.
139        """
140        return self._P
141
142    @property
143    def transform_obs(self):
144        """
145        Returns
146        -------
147        lambda
148            Function that converts the observation.
149        """
150        return self._transform_obs

Wraps a gymnasium.Env to allow a modular transformation of the step() and reset() methods.

This class is the base class of all wrappers to change the behavior of the underlying environment. Wrappers that inherit from this class can modify the action_space, observation_space, reward_range and metadata attributes, without changing the underlying environment's attributes. Moreover, the behavior of the step() and reset() methods can be changed by these wrappers.

Some attributes (spec, render_mode, np_random) will point back to the wrapper's environment (i.e. to the corresponding attributes of env).

Note: If you inherit from Wrapper, don't forget to call super().__init__(env)

BlackjackWrapper(env)
 99    def __init__(self, env):
100        """
101        Blackjack wrapper that modifies the observation space and creates a transition/reward matrix P.
102
103        Parameters
104        ----------
105        env : gymnasium.Env
106            Blackjack base environment.
107
108        Explanation of _transform_obs lambda:
109        Lambda function assigned to the variable `self._convert_state_obs` takes parameter, `state` and
110        converts the input into a compact single integer value by concatenating player hand with dealer card.
111        See comments above for further information.
112        """
113        self._transform_obs = lambda obs: (
114            int(f"{28}{(obs[1] - 2) % 10}")
115            if (obs[0] == 21 and obs[2])
116            else int(f"{27}{(obs[1] - 2) % 10}")
117            if (obs[0] == 21 and not obs[2])
118            else int(f"{obs[0] + 6}{(obs[1] - 2) % 10}")
119            if obs[2]
120            else int(f"{obs[0] - 4}{(obs[1] - 2) % 10}")
121        )
122        env = CustomTransformObservation(
123            env, self._transform_obs, gym.spaces.Discrete(290)
124        )
125        super().__init__(env)
126        current_dir = os.path.dirname(__file__)
127        file_name = "blackjack-envP.pickle"
128        f = os.path.join(current_dir, file_name)
129        with open(f, "rb") as f:
130            self._P = pickle.load(f)

Blackjack wrapper that modifies the observation space and creates a transition/reward matrix P.

Parameters
  • env (gymnasium.Env): Blackjack base environment.
  • Explanation of _transform_obs lambda:
  • Lambda function assigned to the variable self._convert_state_obs takes parameter, state and
  • converts the input into a compact single integer value by concatenating player hand with dealer card.
  • See comments above for further information.
P
132    @property
133    def P(self):
134        """
135        Returns
136        -------
137        dict
138            Transition/reward matrix.
139        """
140        return self._P
Returns
  • dict: Transition/reward matrix.
transform_obs
142    @property
143    def transform_obs(self):
144        """
145        Returns
146        -------
147        lambda
148            Function that converts the observation.
149        """
150        return self._transform_obs
Returns
  • lambda: Function that converts the observation.