utils.test_env

Author: John Mansfield

documentation added by: Gagandeep Randhawa

 1# -*- coding: utf-8 -*-
 2"""
 3Author: John Mansfield
 4
 5documentation added by: Gagandeep Randhawa
 6"""
 7
 8"""
 9Simulation of the agent's decision process after it has learned a policy.
10"""
11
12import gymnasium as gym
13import pygame
14import numpy as np
15
16
17class TestEnv:
18    def __init__(self):
19        pass
20
21    @staticmethod
22    def test_env(env, desc=None, render=False, n_iters=10, pi=None, user_input=False, convert_state_obs=lambda state: state):
23        """
24        Parameters
25        ----------------------------
26        env {OpenAI Gym Environment}: MDP problem
27
28        desc {numpy array}: description of the environment (for custom environments)
29
30        render {Boolean}, default = False: openAI human render mode
31
32        n_iters {int}, default = 10: Number of iterations to simulate the agent for
33
34        pi {lambda}: Policy used to calculate action value at a given state
35
36        user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state
37
38        convert_state_obs {lambda}: Optionally used in environments where state observation is transformed.
39
40
41        Returns
42        ----------------------------
43        test_scores {numpy array}:
44            Log of rewards from each episode.
45        """
46        if render:
47            #reinit environment in 'human' render_mode
48            env_name = env.unwrapped.spec.id
49            if desc is None:
50                env = gym.make(env_name, render_mode='human')
51            else:
52                env = gym.make(env_name, desc=desc, render_mode='human')
53        n_actions = env.action_space.n
54        test_scores = np.full([n_iters], np.nan)
55        for i in range(0, n_iters):
56            state, info = env.reset()
57            done = False
58            state = convert_state_obs(state)
59            total_reward = 0
60            while not done:
61                if user_input:
62                    # get user input and suggest policy output
63                    print("state is %i" % state)
64                    print("policy output is %i" % pi[state])
65                    while True:
66                        action = input("Please select 0 - %i then hit enter:\n" % int(n_actions-1))
67                        try:
68                            action = int(action)
69                        except ValueError:
70                            print("Please enter a number")
71                            continue
72                        if 0 <= action < n_actions:
73                            break
74                        else:
75                            print("please enter a valid action, 0 - %i \n" % int(n_actions - 1))
76                else:
77                    action = pi[state]
78                next_state, reward, terminated, truncated, info = env.step(action)
79                done = terminated or truncated
80                next_state = convert_state_obs(next_state)
81                state = next_state
82                total_reward = reward + total_reward
83            test_scores[i] = total_reward
84        env.close()
85        return test_scores
class TestEnv:
18class TestEnv:
19    def __init__(self):
20        pass
21
22    @staticmethod
23    def test_env(env, desc=None, render=False, n_iters=10, pi=None, user_input=False, convert_state_obs=lambda state: state):
24        """
25        Parameters
26        ----------------------------
27        env {OpenAI Gym Environment}: MDP problem
28
29        desc {numpy array}: description of the environment (for custom environments)
30
31        render {Boolean}, default = False: openAI human render mode
32
33        n_iters {int}, default = 10: Number of iterations to simulate the agent for
34
35        pi {lambda}: Policy used to calculate action value at a given state
36
37        user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state
38
39        convert_state_obs {lambda}: Optionally used in environments where state observation is transformed.
40
41
42        Returns
43        ----------------------------
44        test_scores {numpy array}:
45            Log of rewards from each episode.
46        """
47        if render:
48            #reinit environment in 'human' render_mode
49            env_name = env.unwrapped.spec.id
50            if desc is None:
51                env = gym.make(env_name, render_mode='human')
52            else:
53                env = gym.make(env_name, desc=desc, render_mode='human')
54        n_actions = env.action_space.n
55        test_scores = np.full([n_iters], np.nan)
56        for i in range(0, n_iters):
57            state, info = env.reset()
58            done = False
59            state = convert_state_obs(state)
60            total_reward = 0
61            while not done:
62                if user_input:
63                    # get user input and suggest policy output
64                    print("state is %i" % state)
65                    print("policy output is %i" % pi[state])
66                    while True:
67                        action = input("Please select 0 - %i then hit enter:\n" % int(n_actions-1))
68                        try:
69                            action = int(action)
70                        except ValueError:
71                            print("Please enter a number")
72                            continue
73                        if 0 <= action < n_actions:
74                            break
75                        else:
76                            print("please enter a valid action, 0 - %i \n" % int(n_actions - 1))
77                else:
78                    action = pi[state]
79                next_state, reward, terminated, truncated, info = env.step(action)
80                done = terminated or truncated
81                next_state = convert_state_obs(next_state)
82                state = next_state
83                total_reward = reward + total_reward
84            test_scores[i] = total_reward
85        env.close()
86        return test_scores
@staticmethod
def test_env( env, desc=None, render=False, n_iters=10, pi=None, user_input=False, convert_state_obs=<function TestEnv.<lambda>>):
22    @staticmethod
23    def test_env(env, desc=None, render=False, n_iters=10, pi=None, user_input=False, convert_state_obs=lambda state: state):
24        """
25        Parameters
26        ----------------------------
27        env {OpenAI Gym Environment}: MDP problem
28
29        desc {numpy array}: description of the environment (for custom environments)
30
31        render {Boolean}, default = False: openAI human render mode
32
33        n_iters {int}, default = 10: Number of iterations to simulate the agent for
34
35        pi {lambda}: Policy used to calculate action value at a given state
36
37        user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state
38
39        convert_state_obs {lambda}: Optionally used in environments where state observation is transformed.
40
41
42        Returns
43        ----------------------------
44        test_scores {numpy array}:
45            Log of rewards from each episode.
46        """
47        if render:
48            #reinit environment in 'human' render_mode
49            env_name = env.unwrapped.spec.id
50            if desc is None:
51                env = gym.make(env_name, render_mode='human')
52            else:
53                env = gym.make(env_name, desc=desc, render_mode='human')
54        n_actions = env.action_space.n
55        test_scores = np.full([n_iters], np.nan)
56        for i in range(0, n_iters):
57            state, info = env.reset()
58            done = False
59            state = convert_state_obs(state)
60            total_reward = 0
61            while not done:
62                if user_input:
63                    # get user input and suggest policy output
64                    print("state is %i" % state)
65                    print("policy output is %i" % pi[state])
66                    while True:
67                        action = input("Please select 0 - %i then hit enter:\n" % int(n_actions-1))
68                        try:
69                            action = int(action)
70                        except ValueError:
71                            print("Please enter a number")
72                            continue
73                        if 0 <= action < n_actions:
74                            break
75                        else:
76                            print("please enter a valid action, 0 - %i \n" % int(n_actions - 1))
77                else:
78                    action = pi[state]
79                next_state, reward, terminated, truncated, info = env.step(action)
80                done = terminated or truncated
81                next_state = convert_state_obs(next_state)
82                state = next_state
83                total_reward = reward + total_reward
84            test_scores[i] = total_reward
85        env.close()
86        return test_scores
Parameters
  • env {OpenAI Gym Environment} (MDP problem):

  • desc {numpy array} (description of the environment (for custom environments)):

  • render {Boolean}, default = False (openAI human render mode):

  • n_iters {int}, default = 10 (Number of iterations to simulate the agent for):

  • pi {lambda} (Policy used to calculate action value at a given state):

  • user_input {Boolean}, default = False (Prompt for letting user decide which action to take at a given state):

  • convert_state_obs {lambda} (Optionally used in environments where state observation is transformed.):

Returns
  • test_scores {numpy array}:: Log of rewards from each episode.