utils.test_env
Author: John Mansfield
documentation added by: Gagandeep Randhawa
1# -*- coding: utf-8 -*- 2""" 3Author: John Mansfield 4 5documentation added by: Gagandeep Randhawa 6""" 7 8""" 9Simulation of the agent's decision process after it has learned a policy. 10""" 11 12import gymnasium as gym 13import pygame 14import numpy as np 15 16 17class TestEnv: 18 def __init__(self): 19 pass 20 21 @staticmethod 22 def test_env(env, desc=None, render=False, n_iters=10, pi=None, user_input=False, convert_state_obs=lambda state: state): 23 """ 24 Parameters 25 ---------------------------- 26 env {OpenAI Gym Environment}: MDP problem 27 28 desc {numpy array}: description of the environment (for custom environments) 29 30 render {Boolean}, default = False: openAI human render mode 31 32 n_iters {int}, default = 10: Number of iterations to simulate the agent for 33 34 pi {lambda}: Policy used to calculate action value at a given state 35 36 user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state 37 38 convert_state_obs {lambda}: Optionally used in environments where state observation is transformed. 39 40 41 Returns 42 ---------------------------- 43 test_scores {numpy array}: 44 Log of rewards from each episode. 45 """ 46 if render: 47 #reinit environment in 'human' render_mode 48 env_name = env.unwrapped.spec.id 49 if desc is None: 50 env = gym.make(env_name, render_mode='human') 51 else: 52 env = gym.make(env_name, desc=desc, render_mode='human') 53 n_actions = env.action_space.n 54 test_scores = np.full([n_iters], np.nan) 55 for i in range(0, n_iters): 56 state, info = env.reset() 57 done = False 58 state = convert_state_obs(state) 59 total_reward = 0 60 while not done: 61 if user_input: 62 # get user input and suggest policy output 63 print("state is %i" % state) 64 print("policy output is %i" % pi[state]) 65 while True: 66 action = input("Please select 0 - %i then hit enter:\n" % int(n_actions-1)) 67 try: 68 action = int(action) 69 except ValueError: 70 print("Please enter a number") 71 continue 72 if 0 <= action < n_actions: 73 break 74 else: 75 print("please enter a valid action, 0 - %i \n" % int(n_actions - 1)) 76 else: 77 action = pi[state] 78 next_state, reward, terminated, truncated, info = env.step(action) 79 done = terminated or truncated 80 next_state = convert_state_obs(next_state) 81 state = next_state 82 total_reward = reward + total_reward 83 test_scores[i] = total_reward 84 env.close() 85 return test_scores
class
TestEnv:
18class TestEnv: 19 def __init__(self): 20 pass 21 22 @staticmethod 23 def test_env(env, desc=None, render=False, n_iters=10, pi=None, user_input=False, convert_state_obs=lambda state: state): 24 """ 25 Parameters 26 ---------------------------- 27 env {OpenAI Gym Environment}: MDP problem 28 29 desc {numpy array}: description of the environment (for custom environments) 30 31 render {Boolean}, default = False: openAI human render mode 32 33 n_iters {int}, default = 10: Number of iterations to simulate the agent for 34 35 pi {lambda}: Policy used to calculate action value at a given state 36 37 user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state 38 39 convert_state_obs {lambda}: Optionally used in environments where state observation is transformed. 40 41 42 Returns 43 ---------------------------- 44 test_scores {numpy array}: 45 Log of rewards from each episode. 46 """ 47 if render: 48 #reinit environment in 'human' render_mode 49 env_name = env.unwrapped.spec.id 50 if desc is None: 51 env = gym.make(env_name, render_mode='human') 52 else: 53 env = gym.make(env_name, desc=desc, render_mode='human') 54 n_actions = env.action_space.n 55 test_scores = np.full([n_iters], np.nan) 56 for i in range(0, n_iters): 57 state, info = env.reset() 58 done = False 59 state = convert_state_obs(state) 60 total_reward = 0 61 while not done: 62 if user_input: 63 # get user input and suggest policy output 64 print("state is %i" % state) 65 print("policy output is %i" % pi[state]) 66 while True: 67 action = input("Please select 0 - %i then hit enter:\n" % int(n_actions-1)) 68 try: 69 action = int(action) 70 except ValueError: 71 print("Please enter a number") 72 continue 73 if 0 <= action < n_actions: 74 break 75 else: 76 print("please enter a valid action, 0 - %i \n" % int(n_actions - 1)) 77 else: 78 action = pi[state] 79 next_state, reward, terminated, truncated, info = env.step(action) 80 done = terminated or truncated 81 next_state = convert_state_obs(next_state) 82 state = next_state 83 total_reward = reward + total_reward 84 test_scores[i] = total_reward 85 env.close() 86 return test_scores
@staticmethod
def
test_env( env, desc=None, render=False, n_iters=10, pi=None, user_input=False, convert_state_obs=<function TestEnv.<lambda>>):
22 @staticmethod 23 def test_env(env, desc=None, render=False, n_iters=10, pi=None, user_input=False, convert_state_obs=lambda state: state): 24 """ 25 Parameters 26 ---------------------------- 27 env {OpenAI Gym Environment}: MDP problem 28 29 desc {numpy array}: description of the environment (for custom environments) 30 31 render {Boolean}, default = False: openAI human render mode 32 33 n_iters {int}, default = 10: Number of iterations to simulate the agent for 34 35 pi {lambda}: Policy used to calculate action value at a given state 36 37 user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state 38 39 convert_state_obs {lambda}: Optionally used in environments where state observation is transformed. 40 41 42 Returns 43 ---------------------------- 44 test_scores {numpy array}: 45 Log of rewards from each episode. 46 """ 47 if render: 48 #reinit environment in 'human' render_mode 49 env_name = env.unwrapped.spec.id 50 if desc is None: 51 env = gym.make(env_name, render_mode='human') 52 else: 53 env = gym.make(env_name, desc=desc, render_mode='human') 54 n_actions = env.action_space.n 55 test_scores = np.full([n_iters], np.nan) 56 for i in range(0, n_iters): 57 state, info = env.reset() 58 done = False 59 state = convert_state_obs(state) 60 total_reward = 0 61 while not done: 62 if user_input: 63 # get user input and suggest policy output 64 print("state is %i" % state) 65 print("policy output is %i" % pi[state]) 66 while True: 67 action = input("Please select 0 - %i then hit enter:\n" % int(n_actions-1)) 68 try: 69 action = int(action) 70 except ValueError: 71 print("Please enter a number") 72 continue 73 if 0 <= action < n_actions: 74 break 75 else: 76 print("please enter a valid action, 0 - %i \n" % int(n_actions - 1)) 77 else: 78 action = pi[state] 79 next_state, reward, terminated, truncated, info = env.step(action) 80 done = terminated or truncated 81 next_state = convert_state_obs(next_state) 82 state = next_state 83 total_reward = reward + total_reward 84 test_scores[i] = total_reward 85 env.close() 86 return test_scores
Parameters
env {OpenAI Gym Environment} (MDP problem):
desc {numpy array} (description of the environment (for custom environments)):
render {Boolean}, default = False (openAI human render mode):
n_iters {int}, default = 10 (Number of iterations to simulate the agent for):
pi {lambda} (Policy used to calculate action value at a given state):
user_input {Boolean}, default = False (Prompt for letting user decide which action to take at a given state):
convert_state_obs {lambda} (Optionally used in environments where state observation is transformed.):
Returns
- test_scores {numpy array}:: Log of rewards from each episode.