bettermdptools.utils.test_env

Author: John Mansfield

documentation added by: Gagandeep Randhawa

 1# -*- coding: utf-8 -*-
 2"""
 3Author: John Mansfield
 4
 5documentation added by: Gagandeep Randhawa
 6"""
 7
 8"""
 9Simulation of the agent's decision process after it has learned a policy.
10"""
11
12import gymnasium as gym
13import numpy as np
14
15
16class TestEnv:
17    def __init__(self):
18        pass
19
20    @staticmethod
21    def test_env(
22        env,
23        desc=None,
24        render=False,
25        n_iters=10,
26        pi=None,
27        user_input=False,
28        convert_state_obs=lambda state: state,
29    ):
30        """
31        Parameters
32        ----------------------------
33        env {OpenAI Gym Environment}: MDP problem
34
35        desc {numpy array}: description of the environment (for custom environments)
36
37        render {Boolean}, default = False: openAI human render mode
38
39        n_iters {int}, default = 10: Number of iterations to simulate the agent for
40
41        pi {lambda}: Policy used to calculate action value at a given state
42
43        user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state
44
45        convert_state_obs {lambda}: Optionally used in environments where state observation is transformed.
46
47
48        Returns
49        ----------------------------
50        test_scores {numpy array}:
51            Log of rewards from each episode.
52        """
53        if render:
54            # reinit environment in 'human' render_mode
55            env_name = env.unwrapped.spec.id
56            if desc is None:
57                env = gym.make(env_name, render_mode="human")
58            else:
59                env = gym.make(env_name, desc=desc, render_mode="human")
60        n_actions = env.action_space.n
61        test_scores = np.full([n_iters], np.nan)
62        for i in range(0, n_iters):
63            state, info = env.reset()
64            done = False
65            state = convert_state_obs(state)
66            total_reward = 0
67            while not done:
68                if user_input:
69                    # get user input and suggest policy output
70                    print("state is %i" % state)
71                    print("policy output is %i" % pi[state])
72                    while True:
73                        action = input(
74                            "Please select 0 - %i then hit enter:\n"
75                            % int(n_actions - 1)
76                        )
77                        try:
78                            action = int(action)
79                        except ValueError:
80                            print("Please enter a number")
81                            continue
82                        if 0 <= action < n_actions:
83                            break
84                        else:
85                            print(
86                                "please enter a valid action, 0 - %i \n"
87                                % int(n_actions - 1)
88                            )
89                else:
90                    action = pi[state]
91                next_state, reward, terminated, truncated, info = env.step(action)
92                done = terminated or truncated
93                next_state = convert_state_obs(next_state)
94                state = next_state
95                total_reward = reward + total_reward
96            test_scores[i] = total_reward
97        env.close()
98        return test_scores
class TestEnv:
17class TestEnv:
18    def __init__(self):
19        pass
20
21    @staticmethod
22    def test_env(
23        env,
24        desc=None,
25        render=False,
26        n_iters=10,
27        pi=None,
28        user_input=False,
29        convert_state_obs=lambda state: state,
30    ):
31        """
32        Parameters
33        ----------------------------
34        env {OpenAI Gym Environment}: MDP problem
35
36        desc {numpy array}: description of the environment (for custom environments)
37
38        render {Boolean}, default = False: openAI human render mode
39
40        n_iters {int}, default = 10: Number of iterations to simulate the agent for
41
42        pi {lambda}: Policy used to calculate action value at a given state
43
44        user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state
45
46        convert_state_obs {lambda}: Optionally used in environments where state observation is transformed.
47
48
49        Returns
50        ----------------------------
51        test_scores {numpy array}:
52            Log of rewards from each episode.
53        """
54        if render:
55            # reinit environment in 'human' render_mode
56            env_name = env.unwrapped.spec.id
57            if desc is None:
58                env = gym.make(env_name, render_mode="human")
59            else:
60                env = gym.make(env_name, desc=desc, render_mode="human")
61        n_actions = env.action_space.n
62        test_scores = np.full([n_iters], np.nan)
63        for i in range(0, n_iters):
64            state, info = env.reset()
65            done = False
66            state = convert_state_obs(state)
67            total_reward = 0
68            while not done:
69                if user_input:
70                    # get user input and suggest policy output
71                    print("state is %i" % state)
72                    print("policy output is %i" % pi[state])
73                    while True:
74                        action = input(
75                            "Please select 0 - %i then hit enter:\n"
76                            % int(n_actions - 1)
77                        )
78                        try:
79                            action = int(action)
80                        except ValueError:
81                            print("Please enter a number")
82                            continue
83                        if 0 <= action < n_actions:
84                            break
85                        else:
86                            print(
87                                "please enter a valid action, 0 - %i \n"
88                                % int(n_actions - 1)
89                            )
90                else:
91                    action = pi[state]
92                next_state, reward, terminated, truncated, info = env.step(action)
93                done = terminated or truncated
94                next_state = convert_state_obs(next_state)
95                state = next_state
96                total_reward = reward + total_reward
97            test_scores[i] = total_reward
98        env.close()
99        return test_scores
@staticmethod
def test_env( env, desc=None, render=False, n_iters=10, pi=None, user_input=False, convert_state_obs=<function TestEnv.<lambda>>):
21    @staticmethod
22    def test_env(
23        env,
24        desc=None,
25        render=False,
26        n_iters=10,
27        pi=None,
28        user_input=False,
29        convert_state_obs=lambda state: state,
30    ):
31        """
32        Parameters
33        ----------------------------
34        env {OpenAI Gym Environment}: MDP problem
35
36        desc {numpy array}: description of the environment (for custom environments)
37
38        render {Boolean}, default = False: openAI human render mode
39
40        n_iters {int}, default = 10: Number of iterations to simulate the agent for
41
42        pi {lambda}: Policy used to calculate action value at a given state
43
44        user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state
45
46        convert_state_obs {lambda}: Optionally used in environments where state observation is transformed.
47
48
49        Returns
50        ----------------------------
51        test_scores {numpy array}:
52            Log of rewards from each episode.
53        """
54        if render:
55            # reinit environment in 'human' render_mode
56            env_name = env.unwrapped.spec.id
57            if desc is None:
58                env = gym.make(env_name, render_mode="human")
59            else:
60                env = gym.make(env_name, desc=desc, render_mode="human")
61        n_actions = env.action_space.n
62        test_scores = np.full([n_iters], np.nan)
63        for i in range(0, n_iters):
64            state, info = env.reset()
65            done = False
66            state = convert_state_obs(state)
67            total_reward = 0
68            while not done:
69                if user_input:
70                    # get user input and suggest policy output
71                    print("state is %i" % state)
72                    print("policy output is %i" % pi[state])
73                    while True:
74                        action = input(
75                            "Please select 0 - %i then hit enter:\n"
76                            % int(n_actions - 1)
77                        )
78                        try:
79                            action = int(action)
80                        except ValueError:
81                            print("Please enter a number")
82                            continue
83                        if 0 <= action < n_actions:
84                            break
85                        else:
86                            print(
87                                "please enter a valid action, 0 - %i \n"
88                                % int(n_actions - 1)
89                            )
90                else:
91                    action = pi[state]
92                next_state, reward, terminated, truncated, info = env.step(action)
93                done = terminated or truncated
94                next_state = convert_state_obs(next_state)
95                state = next_state
96                total_reward = reward + total_reward
97            test_scores[i] = total_reward
98        env.close()
99        return test_scores
Parameters
  • env {OpenAI Gym Environment} (MDP problem):

  • desc {numpy array} (description of the environment (for custom environments)):

  • render {Boolean}, default = False (openAI human render mode):

  • n_iters {int}, default = 10 (Number of iterations to simulate the agent for):

  • pi {lambda} (Policy used to calculate action value at a given state):

  • user_input {Boolean}, default = False (Prompt for letting user decide which action to take at a given state):

  • convert_state_obs {lambda} (Optionally used in environments where state observation is transformed.):

Returns
  • test_scores {numpy array}:: Log of rewards from each episode.