bettermdptools.utils.test_env
Author: John Mansfield
documentation added by: Gagandeep Randhawa
1# -*- coding: utf-8 -*- 2""" 3Author: John Mansfield 4 5documentation added by: Gagandeep Randhawa 6""" 7 8""" 9Simulation of the agent's decision process after it has learned a policy. 10""" 11 12import gymnasium as gym 13import numpy as np 14 15 16class TestEnv: 17 def __init__(self): 18 pass 19 20 @staticmethod 21 def test_env( 22 env, 23 desc=None, 24 render=False, 25 n_iters=10, 26 pi=None, 27 user_input=False, 28 convert_state_obs=lambda state: state, 29 ): 30 """ 31 Parameters 32 ---------------------------- 33 env {OpenAI Gym Environment}: MDP problem 34 35 desc {numpy array}: description of the environment (for custom environments) 36 37 render {Boolean}, default = False: openAI human render mode 38 39 n_iters {int}, default = 10: Number of iterations to simulate the agent for 40 41 pi {lambda}: Policy used to calculate action value at a given state 42 43 user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state 44 45 convert_state_obs {lambda}: Optionally used in environments where state observation is transformed. 46 47 48 Returns 49 ---------------------------- 50 test_scores {numpy array}: 51 Log of rewards from each episode. 52 """ 53 if render: 54 # reinit environment in 'human' render_mode 55 env_name = env.unwrapped.spec.id 56 if desc is None: 57 env = gym.make(env_name, render_mode="human") 58 else: 59 env = gym.make(env_name, desc=desc, render_mode="human") 60 n_actions = env.action_space.n 61 test_scores = np.full([n_iters], np.nan) 62 for i in range(0, n_iters): 63 state, info = env.reset() 64 done = False 65 state = convert_state_obs(state) 66 total_reward = 0 67 while not done: 68 if user_input: 69 # get user input and suggest policy output 70 print("state is %i" % state) 71 print("policy output is %i" % pi[state]) 72 while True: 73 action = input( 74 "Please select 0 - %i then hit enter:\n" 75 % int(n_actions - 1) 76 ) 77 try: 78 action = int(action) 79 except ValueError: 80 print("Please enter a number") 81 continue 82 if 0 <= action < n_actions: 83 break 84 else: 85 print( 86 "please enter a valid action, 0 - %i \n" 87 % int(n_actions - 1) 88 ) 89 else: 90 action = pi[state] 91 next_state, reward, terminated, truncated, info = env.step(action) 92 done = terminated or truncated 93 next_state = convert_state_obs(next_state) 94 state = next_state 95 total_reward = reward + total_reward 96 test_scores[i] = total_reward 97 env.close() 98 return test_scores
class
TestEnv:
17class TestEnv: 18 def __init__(self): 19 pass 20 21 @staticmethod 22 def test_env( 23 env, 24 desc=None, 25 render=False, 26 n_iters=10, 27 pi=None, 28 user_input=False, 29 convert_state_obs=lambda state: state, 30 ): 31 """ 32 Parameters 33 ---------------------------- 34 env {OpenAI Gym Environment}: MDP problem 35 36 desc {numpy array}: description of the environment (for custom environments) 37 38 render {Boolean}, default = False: openAI human render mode 39 40 n_iters {int}, default = 10: Number of iterations to simulate the agent for 41 42 pi {lambda}: Policy used to calculate action value at a given state 43 44 user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state 45 46 convert_state_obs {lambda}: Optionally used in environments where state observation is transformed. 47 48 49 Returns 50 ---------------------------- 51 test_scores {numpy array}: 52 Log of rewards from each episode. 53 """ 54 if render: 55 # reinit environment in 'human' render_mode 56 env_name = env.unwrapped.spec.id 57 if desc is None: 58 env = gym.make(env_name, render_mode="human") 59 else: 60 env = gym.make(env_name, desc=desc, render_mode="human") 61 n_actions = env.action_space.n 62 test_scores = np.full([n_iters], np.nan) 63 for i in range(0, n_iters): 64 state, info = env.reset() 65 done = False 66 state = convert_state_obs(state) 67 total_reward = 0 68 while not done: 69 if user_input: 70 # get user input and suggest policy output 71 print("state is %i" % state) 72 print("policy output is %i" % pi[state]) 73 while True: 74 action = input( 75 "Please select 0 - %i then hit enter:\n" 76 % int(n_actions - 1) 77 ) 78 try: 79 action = int(action) 80 except ValueError: 81 print("Please enter a number") 82 continue 83 if 0 <= action < n_actions: 84 break 85 else: 86 print( 87 "please enter a valid action, 0 - %i \n" 88 % int(n_actions - 1) 89 ) 90 else: 91 action = pi[state] 92 next_state, reward, terminated, truncated, info = env.step(action) 93 done = terminated or truncated 94 next_state = convert_state_obs(next_state) 95 state = next_state 96 total_reward = reward + total_reward 97 test_scores[i] = total_reward 98 env.close() 99 return test_scores
@staticmethod
def
test_env( env, desc=None, render=False, n_iters=10, pi=None, user_input=False, convert_state_obs=<function TestEnv.<lambda>>):
21 @staticmethod 22 def test_env( 23 env, 24 desc=None, 25 render=False, 26 n_iters=10, 27 pi=None, 28 user_input=False, 29 convert_state_obs=lambda state: state, 30 ): 31 """ 32 Parameters 33 ---------------------------- 34 env {OpenAI Gym Environment}: MDP problem 35 36 desc {numpy array}: description of the environment (for custom environments) 37 38 render {Boolean}, default = False: openAI human render mode 39 40 n_iters {int}, default = 10: Number of iterations to simulate the agent for 41 42 pi {lambda}: Policy used to calculate action value at a given state 43 44 user_input {Boolean}, default = False: Prompt for letting user decide which action to take at a given state 45 46 convert_state_obs {lambda}: Optionally used in environments where state observation is transformed. 47 48 49 Returns 50 ---------------------------- 51 test_scores {numpy array}: 52 Log of rewards from each episode. 53 """ 54 if render: 55 # reinit environment in 'human' render_mode 56 env_name = env.unwrapped.spec.id 57 if desc is None: 58 env = gym.make(env_name, render_mode="human") 59 else: 60 env = gym.make(env_name, desc=desc, render_mode="human") 61 n_actions = env.action_space.n 62 test_scores = np.full([n_iters], np.nan) 63 for i in range(0, n_iters): 64 state, info = env.reset() 65 done = False 66 state = convert_state_obs(state) 67 total_reward = 0 68 while not done: 69 if user_input: 70 # get user input and suggest policy output 71 print("state is %i" % state) 72 print("policy output is %i" % pi[state]) 73 while True: 74 action = input( 75 "Please select 0 - %i then hit enter:\n" 76 % int(n_actions - 1) 77 ) 78 try: 79 action = int(action) 80 except ValueError: 81 print("Please enter a number") 82 continue 83 if 0 <= action < n_actions: 84 break 85 else: 86 print( 87 "please enter a valid action, 0 - %i \n" 88 % int(n_actions - 1) 89 ) 90 else: 91 action = pi[state] 92 next_state, reward, terminated, truncated, info = env.step(action) 93 done = terminated or truncated 94 next_state = convert_state_obs(next_state) 95 state = next_state 96 total_reward = reward + total_reward 97 test_scores[i] = total_reward 98 env.close() 99 return test_scores
Parameters
env {OpenAI Gym Environment} (MDP problem):
desc {numpy array} (description of the environment (for custom environments)):
render {Boolean}, default = False (openAI human render mode):
n_iters {int}, default = 10 (Number of iterations to simulate the agent for):
pi {lambda} (Policy used to calculate action value at a given state):
user_input {Boolean}, default = False (Prompt for letting user decide which action to take at a given state):
convert_state_obs {lambda} (Optionally used in environments where state observation is transformed.):
Returns
- test_scores {numpy array}:: Log of rewards from each episode.