envs.pendulum_discretized
Author: Aleksandr Spiridonov BSD 3-Clause License
1""" 2Author: Aleksandr Spiridonov 3BSD 3-Clause License 4""" 5import numpy as np 6from bettermdptools.envs.binning import generate_bin_edges 7from gymnasium.envs.classic_control.pendulum import angle_normalize 8from gymnasium.envs.classic_control.acrobot import wrap 9import os 10import gzip 11import pickle 12from concurrent.futures import ProcessPoolExecutor, as_completed 13 14 15CACHED_P_PATH_FORMAT = 'cached_P_discretized_pendulum_{angle_bins}_{angular_velocity_bins}_{action_bins}.pkl.gz' 16 17def index_to_state( 18 index, 19 angle_bins, 20 angular_velocity_bins 21): 22 angle_idx = index // angular_velocity_bins 23 angular_velocity_idx = index % angular_velocity_bins 24 return angle_idx, angular_velocity_idx 25 26def index_to_continous_state( 27 index, 28 angle_bin_edges, 29 angular_velocity_bin_edges 30): 31 angle_idx, angular_velocity_idx = index_to_state(index, len(angle_bin_edges) - 1, len(angular_velocity_bin_edges) - 1) 32 angle = (angle_bin_edges[angle_idx] + angle_bin_edges[angle_idx + 1]) / 2.0 33 angular_velocity = (angular_velocity_bin_edges[angular_velocity_idx] + angular_velocity_bin_edges[angular_velocity_idx + 1]) / 2.0 34 return angle, angular_velocity 35 36def state_to_index( 37 angle_idx, 38 angular_velocity_idx, 39 angular_velocity_bins 40): 41 return angle_idx * angular_velocity_bins + angular_velocity_idx 42 43def get_torque_value(torque_bin_edges, action): 44 return (torque_bin_edges[action] + torque_bin_edges[action + 1]) / 2.0 45 46def compute_next_probable_states( 47 angle_idx, 48 angular_velocity_idx, 49 action, 50 angle_bin_edges, 51 angular_velocity_bin_edges, 52 torque_bin_edges, 53 num_samples=11, 54 g=10.0, 55 l=1.0, 56 m=1.0, 57 dt=0.05 58): 59 angle_low, angle_high = angle_bin_edges[angle_idx], angle_bin_edges[angle_idx + 1] 60 angular_velocity_low, angular_velocity_high = angular_velocity_bin_edges[angular_velocity_idx], angular_velocity_bin_edges[angular_velocity_idx + 1] 61 torque = get_torque_value(torque_bin_edges, action) 62 63 min_angular_velocity = angular_velocity_bin_edges[0] 64 max_angular_velocity = angular_velocity_bin_edges[-1] 65 66 angle_samples = np.linspace(angle_low, angle_high, num_samples) 67 angle_samples = angle_samples[1:-1] # Exclude the bin edges 68 angular_velocity_samples = np.linspace(angular_velocity_low, angular_velocity_high, num_samples) 69 angular_velocity_samples = angular_velocity_samples[1:-1] 70 71 angle_bins = len(angle_bin_edges) - 1 72 angular_velocity_bins = len(angular_velocity_bin_edges) - 1 73 74 next_states_and_rewards = {} 75 76 for angle in angle_samples: 77 for angular_velocity in angular_velocity_samples: 78 costs = angle_normalize(angle) ** 2 + 0.1 * angular_velocity ** 2 + 0.001 * (torque ** 2) 79 80 new_angular_velocity = angular_velocity + (3 * g / (2 * l) * np.sin(angle) + 3.0 / (m * l ** 2) * torque) * dt 81 new_angular_velocity = np.clip(new_angular_velocity, min_angular_velocity+1e-6, max_angular_velocity-1e-6) 82 83 new_angle = angle + new_angular_velocity * dt 84 new_angle = wrap(new_angle, -np.pi, np.pi) 85 86 new_angle_idx = np.digitize(new_angle, angle_bin_edges) - 1 87 new_angular_velocity_idx = np.digitize(new_angular_velocity, angular_velocity_bin_edges) - 1 88 89 new_state = state_to_index( 90 new_angle_idx, 91 new_angular_velocity_idx, 92 angular_velocity_bins) 93 94 if new_state < 0 or new_state >= angle_bins * angular_velocity_bins: 95 raise ValueError(f"Invalid state index: {new_state}") 96 97 terminated = False 98 99 summary = (new_state, -costs, terminated) 100 101 if new_state not in next_states_and_rewards: 102 next_states_and_rewards[new_state] = [] 103 next_states_and_rewards[new_state].append(summary) 104 105 n_total = len(angle_samples) * len(angular_velocity_samples) 106 107 results = [] 108 109 for new_state, summaries in next_states_and_rewards.items(): 110 n = len(summaries) 111 prob = n / n_total 112 ave_reward = sum(r for _, r, _ in summaries) / n 113 terminated = False 114 results.append((prob, new_state, ave_reward, terminated)) 115 116 return results 117 118def setup_transition_probabilities_for_state(args): 119 state, angle_bin_edges, angular_velocity_bin_edges, torque_bin_edges, dim_samples = args 120 angle_bins = len(angle_bin_edges) - 1 121 angular_velocity_bins = len(angular_velocity_bin_edges) - 1 122 action_bins = len(torque_bin_edges) - 1 123 124 P_state = {action: [] for action in range(action_bins)} 125 126 angle_idx, angular_velocity_idx = index_to_state(state, angle_bins, angular_velocity_bins) 127 128 for action in range(action_bins): 129 P_state[action] = compute_next_probable_states( 130 angle_idx, 131 angular_velocity_idx, 132 action, 133 angle_bin_edges, 134 angular_velocity_bin_edges, 135 torque_bin_edges, 136 num_samples=dim_samples 137 ) 138 139 try: 140 return (state, P_state) 141 except Exception as e: 142 print(f"Error in state {state}: {e}") 143 return None 144 145class DiscretizedPendulum: 146 """ 147 Initialize the DiscretizedPendulum environment. 148 Parameters: 149 ----------- 150 angle_bins : int 151 Number of bins to discretize the angle. 152 angular_velocity_bins : int 153 Number of bins to discretize the angular velocity. 154 torque_bins : int, optional (default=11) 155 Number of bins to discretize the torque. 156 n_workers : int, optional (default=4) 157 Number of worker processes to use for setting up transition probabilities. 158 cache_dir : str, optional (default='./cached') 159 Directory to cache the transition probabilities. 160 dim_samples : int, optional (default=11) 161 Number of samples to use for each dimension when setting up transition probabilities. 162 Attributes: 163 ----------- 164 angle_bins : int 165 Number of bins to discretize the angle. Must be odd. 166 angular_velocity_bins : int 167 Number of bins to discretize the angular velocity. Must be odd. 168 dim_samples : int 169 Number of samples to use for each dimension when setting up transition probabilities. 170 angle_bin_edges : numpy.ndarray 171 Edges of the bins for discretizing the angle. 172 angular_velocity_bin_edges : numpy.ndarray 173 Edges of the bins for discretizing the angular velocity. 174 torque_bin_edges : numpy.ndarray 175 Edges of the bins for discretizing the torque. 176 state_space : int 177 Total number of discrete states. 178 action_space : int 179 Total number of discrete actions. 180 P : dict 181 Transition probability matrix. 182 n_workers : int 183 Number of worker processes to use for setting up transition probabilities. 184 """ 185 def __init__( 186 self, 187 angle_bins, 188 angular_velocity_bins, 189 torque_bins=11, 190 n_workers=4, 191 cache_dir='./cached', 192 dim_samples=11 193 ): 194 self.angle_bins = angle_bins 195 self.angular_velocity_bins = angular_velocity_bins 196 self.dim_samples = dim_samples 197 self.angle_bin_edges = generate_bin_edges(np.pi, angle_bins, 3, center=True) 198 self.angular_velocity_bin_edges = generate_bin_edges(8, angular_velocity_bins, 3, center=False) 199 self.torque_bin_edges = generate_bin_edges(2, torque_bins, 3, center=False) 200 201 self.state_space = angle_bins * angular_velocity_bins 202 self.action_space = torque_bins 203 204 self.P = {state: {action: [] for action in range(torque_bins)} for state in range(self.state_space)} 205 206 self.n_workers = n_workers 207 208 cached_P_filepath = CACHED_P_PATH_FORMAT.format(angle_bins=angle_bins, angular_velocity_bins=angular_velocity_bins, action_bins=torque_bins) 209 cached_P_filepath = os.path.join(cache_dir, cached_P_filepath) 210 211 if not os.path.exists(cache_dir): 212 os.makedirs(cache_dir) 213 214 if os.path.exists(cached_P_filepath): 215 with gzip.open(cached_P_filepath, 'rb') as f: 216 self.P = pickle.load(f) 217 else: 218 self.setup_transition_probabilities() 219 with gzip.open(cached_P_filepath, 'wb') as f: 220 pickle.dump(self.P, f) 221 222 def discretize_angle(self, angle): 223 return np.digitize(angle, self.angle_bin_edges) - 1 224 225 def discretize_angular_velocity(self, angular_velocity): 226 return np.digitize(angular_velocity, self.angular_velocity_bin_edges) - 1 227 228 def index_to_state(self, index): 229 return index_to_state(index, self.angle_bins, self.angular_velocity_bins) 230 231 def state_to_index(self, angle_idx, angular_velocity_idx): 232 idx = state_to_index(angle_idx, angular_velocity_idx, self.angular_velocity_bins) 233 if idx < 0 or idx >= self.state_space: 234 raise ValueError(f"Invalid state index: {idx}") 235 return idx 236 237 def transform_cont_obs(self, cont_obs): 238 x = cont_obs[0] 239 y = cont_obs[1] 240 theta = np.arctan2(y, x) 241 theta = wrap(theta, -np.pi, np.pi) 242 theta_dot = cont_obs[2] 243 theta_dot = np.clip(theta_dot, -8+1e-6, 8-1e-6) 244 245 246 angle_idx = self.discretize_angle(theta) 247 angular_velocity_idx = self.discretize_angular_velocity(theta_dot) 248 249 return self.state_to_index(angle_idx, angular_velocity_idx) 250 251 def get_action_value(self, action): 252 return get_torque_value(self.torque_bin_edges, action) 253 254 def setup_transition_probabilities(self): 255 state_space_values = list(range(self.state_space)) 256 257 args = [ 258 ( 259 state, 260 self.angle_bin_edges, 261 self.angular_velocity_bin_edges, 262 self.torque_bin_edges, 263 self.dim_samples 264 ) 265 for state in state_space_values 266 ] 267 268 new_P = {} 269 270 args = [arg for arg in args if arg[0] not in new_P] 271 272 num_workers = self.n_workers 273 274 n_completed = len(new_P) 275 276 batch_size = 1000 277 278 with ProcessPoolExecutor(max_workers=num_workers) as executor: 279 for i in range(0, len(args), batch_size): 280 batch = args[i:i + batch_size] 281 futures = [executor.submit(setup_transition_probabilities_for_state, arg) for arg in batch] 282 for future in as_completed(futures): 283 n_completed += 1 284 try: 285 state, P_state = future.result() 286 new_P[state] = P_state 287 if n_completed % 100 == 0: 288 print(f'Completed {n_completed}/{self.state_space}') 289 except Exception as e: 290 print(f"Error in future: {e}") 291 print('task failed') 292 293 self.P = new_P 294 295if __name__ == '__main__': 296 n_bins = 31 297 angle_bins = n_bins 298 angular_velocity_bins = n_bins 299 300 discretized_pendulum = DiscretizedPendulum( 301 angle_bins=angle_bins, 302 angular_velocity_bins=angular_velocity_bins 303 ) 304 305 angle = np.pi / 2 306 angular_velocity = 3 307 308 obs = np.array([np.cos(angle), np.sin(angle), angular_velocity]) 309 310 state = discretized_pendulum.transform_cont_obs(obs) 311 print(f'Discretized state index: {state}') 312 313 for action in range(discretized_pendulum.action_space): 314 transitions = discretized_pendulum.P[state][action] 315 for prob, next_state, reward, terminated in transitions: 316 print(f'Action: {action}, Probability: {prob}, Next state: {next_state}, Reward: {reward}, Terminated: {terminated}')
27def index_to_continous_state( 28 index, 29 angle_bin_edges, 30 angular_velocity_bin_edges 31): 32 angle_idx, angular_velocity_idx = index_to_state(index, len(angle_bin_edges) - 1, len(angular_velocity_bin_edges) - 1) 33 angle = (angle_bin_edges[angle_idx] + angle_bin_edges[angle_idx + 1]) / 2.0 34 angular_velocity = (angular_velocity_bin_edges[angular_velocity_idx] + angular_velocity_bin_edges[angular_velocity_idx + 1]) / 2.0 35 return angle, angular_velocity
47def compute_next_probable_states( 48 angle_idx, 49 angular_velocity_idx, 50 action, 51 angle_bin_edges, 52 angular_velocity_bin_edges, 53 torque_bin_edges, 54 num_samples=11, 55 g=10.0, 56 l=1.0, 57 m=1.0, 58 dt=0.05 59): 60 angle_low, angle_high = angle_bin_edges[angle_idx], angle_bin_edges[angle_idx + 1] 61 angular_velocity_low, angular_velocity_high = angular_velocity_bin_edges[angular_velocity_idx], angular_velocity_bin_edges[angular_velocity_idx + 1] 62 torque = get_torque_value(torque_bin_edges, action) 63 64 min_angular_velocity = angular_velocity_bin_edges[0] 65 max_angular_velocity = angular_velocity_bin_edges[-1] 66 67 angle_samples = np.linspace(angle_low, angle_high, num_samples) 68 angle_samples = angle_samples[1:-1] # Exclude the bin edges 69 angular_velocity_samples = np.linspace(angular_velocity_low, angular_velocity_high, num_samples) 70 angular_velocity_samples = angular_velocity_samples[1:-1] 71 72 angle_bins = len(angle_bin_edges) - 1 73 angular_velocity_bins = len(angular_velocity_bin_edges) - 1 74 75 next_states_and_rewards = {} 76 77 for angle in angle_samples: 78 for angular_velocity in angular_velocity_samples: 79 costs = angle_normalize(angle) ** 2 + 0.1 * angular_velocity ** 2 + 0.001 * (torque ** 2) 80 81 new_angular_velocity = angular_velocity + (3 * g / (2 * l) * np.sin(angle) + 3.0 / (m * l ** 2) * torque) * dt 82 new_angular_velocity = np.clip(new_angular_velocity, min_angular_velocity+1e-6, max_angular_velocity-1e-6) 83 84 new_angle = angle + new_angular_velocity * dt 85 new_angle = wrap(new_angle, -np.pi, np.pi) 86 87 new_angle_idx = np.digitize(new_angle, angle_bin_edges) - 1 88 new_angular_velocity_idx = np.digitize(new_angular_velocity, angular_velocity_bin_edges) - 1 89 90 new_state = state_to_index( 91 new_angle_idx, 92 new_angular_velocity_idx, 93 angular_velocity_bins) 94 95 if new_state < 0 or new_state >= angle_bins * angular_velocity_bins: 96 raise ValueError(f"Invalid state index: {new_state}") 97 98 terminated = False 99 100 summary = (new_state, -costs, terminated) 101 102 if new_state not in next_states_and_rewards: 103 next_states_and_rewards[new_state] = [] 104 next_states_and_rewards[new_state].append(summary) 105 106 n_total = len(angle_samples) * len(angular_velocity_samples) 107 108 results = [] 109 110 for new_state, summaries in next_states_and_rewards.items(): 111 n = len(summaries) 112 prob = n / n_total 113 ave_reward = sum(r for _, r, _ in summaries) / n 114 terminated = False 115 results.append((prob, new_state, ave_reward, terminated)) 116 117 return results
119def setup_transition_probabilities_for_state(args): 120 state, angle_bin_edges, angular_velocity_bin_edges, torque_bin_edges, dim_samples = args 121 angle_bins = len(angle_bin_edges) - 1 122 angular_velocity_bins = len(angular_velocity_bin_edges) - 1 123 action_bins = len(torque_bin_edges) - 1 124 125 P_state = {action: [] for action in range(action_bins)} 126 127 angle_idx, angular_velocity_idx = index_to_state(state, angle_bins, angular_velocity_bins) 128 129 for action in range(action_bins): 130 P_state[action] = compute_next_probable_states( 131 angle_idx, 132 angular_velocity_idx, 133 action, 134 angle_bin_edges, 135 angular_velocity_bin_edges, 136 torque_bin_edges, 137 num_samples=dim_samples 138 ) 139 140 try: 141 return (state, P_state) 142 except Exception as e: 143 print(f"Error in state {state}: {e}") 144 return None
146class DiscretizedPendulum: 147 """ 148 Initialize the DiscretizedPendulum environment. 149 Parameters: 150 ----------- 151 angle_bins : int 152 Number of bins to discretize the angle. 153 angular_velocity_bins : int 154 Number of bins to discretize the angular velocity. 155 torque_bins : int, optional (default=11) 156 Number of bins to discretize the torque. 157 n_workers : int, optional (default=4) 158 Number of worker processes to use for setting up transition probabilities. 159 cache_dir : str, optional (default='./cached') 160 Directory to cache the transition probabilities. 161 dim_samples : int, optional (default=11) 162 Number of samples to use for each dimension when setting up transition probabilities. 163 Attributes: 164 ----------- 165 angle_bins : int 166 Number of bins to discretize the angle. Must be odd. 167 angular_velocity_bins : int 168 Number of bins to discretize the angular velocity. Must be odd. 169 dim_samples : int 170 Number of samples to use for each dimension when setting up transition probabilities. 171 angle_bin_edges : numpy.ndarray 172 Edges of the bins for discretizing the angle. 173 angular_velocity_bin_edges : numpy.ndarray 174 Edges of the bins for discretizing the angular velocity. 175 torque_bin_edges : numpy.ndarray 176 Edges of the bins for discretizing the torque. 177 state_space : int 178 Total number of discrete states. 179 action_space : int 180 Total number of discrete actions. 181 P : dict 182 Transition probability matrix. 183 n_workers : int 184 Number of worker processes to use for setting up transition probabilities. 185 """ 186 def __init__( 187 self, 188 angle_bins, 189 angular_velocity_bins, 190 torque_bins=11, 191 n_workers=4, 192 cache_dir='./cached', 193 dim_samples=11 194 ): 195 self.angle_bins = angle_bins 196 self.angular_velocity_bins = angular_velocity_bins 197 self.dim_samples = dim_samples 198 self.angle_bin_edges = generate_bin_edges(np.pi, angle_bins, 3, center=True) 199 self.angular_velocity_bin_edges = generate_bin_edges(8, angular_velocity_bins, 3, center=False) 200 self.torque_bin_edges = generate_bin_edges(2, torque_bins, 3, center=False) 201 202 self.state_space = angle_bins * angular_velocity_bins 203 self.action_space = torque_bins 204 205 self.P = {state: {action: [] for action in range(torque_bins)} for state in range(self.state_space)} 206 207 self.n_workers = n_workers 208 209 cached_P_filepath = CACHED_P_PATH_FORMAT.format(angle_bins=angle_bins, angular_velocity_bins=angular_velocity_bins, action_bins=torque_bins) 210 cached_P_filepath = os.path.join(cache_dir, cached_P_filepath) 211 212 if not os.path.exists(cache_dir): 213 os.makedirs(cache_dir) 214 215 if os.path.exists(cached_P_filepath): 216 with gzip.open(cached_P_filepath, 'rb') as f: 217 self.P = pickle.load(f) 218 else: 219 self.setup_transition_probabilities() 220 with gzip.open(cached_P_filepath, 'wb') as f: 221 pickle.dump(self.P, f) 222 223 def discretize_angle(self, angle): 224 return np.digitize(angle, self.angle_bin_edges) - 1 225 226 def discretize_angular_velocity(self, angular_velocity): 227 return np.digitize(angular_velocity, self.angular_velocity_bin_edges) - 1 228 229 def index_to_state(self, index): 230 return index_to_state(index, self.angle_bins, self.angular_velocity_bins) 231 232 def state_to_index(self, angle_idx, angular_velocity_idx): 233 idx = state_to_index(angle_idx, angular_velocity_idx, self.angular_velocity_bins) 234 if idx < 0 or idx >= self.state_space: 235 raise ValueError(f"Invalid state index: {idx}") 236 return idx 237 238 def transform_cont_obs(self, cont_obs): 239 x = cont_obs[0] 240 y = cont_obs[1] 241 theta = np.arctan2(y, x) 242 theta = wrap(theta, -np.pi, np.pi) 243 theta_dot = cont_obs[2] 244 theta_dot = np.clip(theta_dot, -8+1e-6, 8-1e-6) 245 246 247 angle_idx = self.discretize_angle(theta) 248 angular_velocity_idx = self.discretize_angular_velocity(theta_dot) 249 250 return self.state_to_index(angle_idx, angular_velocity_idx) 251 252 def get_action_value(self, action): 253 return get_torque_value(self.torque_bin_edges, action) 254 255 def setup_transition_probabilities(self): 256 state_space_values = list(range(self.state_space)) 257 258 args = [ 259 ( 260 state, 261 self.angle_bin_edges, 262 self.angular_velocity_bin_edges, 263 self.torque_bin_edges, 264 self.dim_samples 265 ) 266 for state in state_space_values 267 ] 268 269 new_P = {} 270 271 args = [arg for arg in args if arg[0] not in new_P] 272 273 num_workers = self.n_workers 274 275 n_completed = len(new_P) 276 277 batch_size = 1000 278 279 with ProcessPoolExecutor(max_workers=num_workers) as executor: 280 for i in range(0, len(args), batch_size): 281 batch = args[i:i + batch_size] 282 futures = [executor.submit(setup_transition_probabilities_for_state, arg) for arg in batch] 283 for future in as_completed(futures): 284 n_completed += 1 285 try: 286 state, P_state = future.result() 287 new_P[state] = P_state 288 if n_completed % 100 == 0: 289 print(f'Completed {n_completed}/{self.state_space}') 290 except Exception as e: 291 print(f"Error in future: {e}") 292 print('task failed') 293 294 self.P = new_P
Initialize the DiscretizedPendulum environment.
Parameters:
angle_bins : int Number of bins to discretize the angle. angular_velocity_bins : int Number of bins to discretize the angular velocity. torque_bins : int, optional (default=11) Number of bins to discretize the torque. n_workers : int, optional (default=4) Number of worker processes to use for setting up transition probabilities. cache_dir : str, optional (default='./cached') Directory to cache the transition probabilities. dim_samples : int, optional (default=11) Number of samples to use for each dimension when setting up transition probabilities.
Attributes:
angle_bins : int Number of bins to discretize the angle. Must be odd. angular_velocity_bins : int Number of bins to discretize the angular velocity. Must be odd. dim_samples : int Number of samples to use for each dimension when setting up transition probabilities. angle_bin_edges : numpy.ndarray Edges of the bins for discretizing the angle. angular_velocity_bin_edges : numpy.ndarray Edges of the bins for discretizing the angular velocity. torque_bin_edges : numpy.ndarray Edges of the bins for discretizing the torque. state_space : int Total number of discrete states. action_space : int Total number of discrete actions. P : dict Transition probability matrix. n_workers : int Number of worker processes to use for setting up transition probabilities.
186 def __init__( 187 self, 188 angle_bins, 189 angular_velocity_bins, 190 torque_bins=11, 191 n_workers=4, 192 cache_dir='./cached', 193 dim_samples=11 194 ): 195 self.angle_bins = angle_bins 196 self.angular_velocity_bins = angular_velocity_bins 197 self.dim_samples = dim_samples 198 self.angle_bin_edges = generate_bin_edges(np.pi, angle_bins, 3, center=True) 199 self.angular_velocity_bin_edges = generate_bin_edges(8, angular_velocity_bins, 3, center=False) 200 self.torque_bin_edges = generate_bin_edges(2, torque_bins, 3, center=False) 201 202 self.state_space = angle_bins * angular_velocity_bins 203 self.action_space = torque_bins 204 205 self.P = {state: {action: [] for action in range(torque_bins)} for state in range(self.state_space)} 206 207 self.n_workers = n_workers 208 209 cached_P_filepath = CACHED_P_PATH_FORMAT.format(angle_bins=angle_bins, angular_velocity_bins=angular_velocity_bins, action_bins=torque_bins) 210 cached_P_filepath = os.path.join(cache_dir, cached_P_filepath) 211 212 if not os.path.exists(cache_dir): 213 os.makedirs(cache_dir) 214 215 if os.path.exists(cached_P_filepath): 216 with gzip.open(cached_P_filepath, 'rb') as f: 217 self.P = pickle.load(f) 218 else: 219 self.setup_transition_probabilities() 220 with gzip.open(cached_P_filepath, 'wb') as f: 221 pickle.dump(self.P, f)
238 def transform_cont_obs(self, cont_obs): 239 x = cont_obs[0] 240 y = cont_obs[1] 241 theta = np.arctan2(y, x) 242 theta = wrap(theta, -np.pi, np.pi) 243 theta_dot = cont_obs[2] 244 theta_dot = np.clip(theta_dot, -8+1e-6, 8-1e-6) 245 246 247 angle_idx = self.discretize_angle(theta) 248 angular_velocity_idx = self.discretize_angular_velocity(theta_dot) 249 250 return self.state_to_index(angle_idx, angular_velocity_idx)
255 def setup_transition_probabilities(self): 256 state_space_values = list(range(self.state_space)) 257 258 args = [ 259 ( 260 state, 261 self.angle_bin_edges, 262 self.angular_velocity_bin_edges, 263 self.torque_bin_edges, 264 self.dim_samples 265 ) 266 for state in state_space_values 267 ] 268 269 new_P = {} 270 271 args = [arg for arg in args if arg[0] not in new_P] 272 273 num_workers = self.n_workers 274 275 n_completed = len(new_P) 276 277 batch_size = 1000 278 279 with ProcessPoolExecutor(max_workers=num_workers) as executor: 280 for i in range(0, len(args), batch_size): 281 batch = args[i:i + batch_size] 282 futures = [executor.submit(setup_transition_probabilities_for_state, arg) for arg in batch] 283 for future in as_completed(futures): 284 n_completed += 1 285 try: 286 state, P_state = future.result() 287 new_P[state] = P_state 288 if n_completed % 100 == 0: 289 print(f'Completed {n_completed}/{self.state_space}') 290 except Exception as e: 291 print(f"Error in future: {e}") 292 print('task failed') 293 294 self.P = new_P