envs.pendulum_discretized

Author: Aleksandr Spiridonov BSD 3-Clause License

  1"""
  2Author: Aleksandr Spiridonov
  3BSD 3-Clause License
  4"""
  5import numpy as np
  6from bettermdptools.envs.binning import generate_bin_edges
  7from gymnasium.envs.classic_control.pendulum import angle_normalize
  8from gymnasium.envs.classic_control.acrobot import wrap
  9import os
 10import gzip
 11import pickle
 12from concurrent.futures import ProcessPoolExecutor, as_completed
 13
 14
 15CACHED_P_PATH_FORMAT = 'cached_P_discretized_pendulum_{angle_bins}_{angular_velocity_bins}_{action_bins}.pkl.gz'
 16
 17def index_to_state(
 18        index,
 19        angle_bins,
 20        angular_velocity_bins
 21):
 22    angle_idx = index // angular_velocity_bins
 23    angular_velocity_idx = index % angular_velocity_bins
 24    return angle_idx, angular_velocity_idx
 25
 26def index_to_continous_state(
 27        index,
 28        angle_bin_edges,
 29        angular_velocity_bin_edges
 30):
 31    angle_idx, angular_velocity_idx = index_to_state(index, len(angle_bin_edges) - 1, len(angular_velocity_bin_edges) - 1)
 32    angle = (angle_bin_edges[angle_idx] + angle_bin_edges[angle_idx + 1]) / 2.0
 33    angular_velocity = (angular_velocity_bin_edges[angular_velocity_idx] + angular_velocity_bin_edges[angular_velocity_idx + 1]) / 2.0
 34    return angle, angular_velocity
 35
 36def state_to_index(
 37        angle_idx,
 38        angular_velocity_idx,
 39        angular_velocity_bins
 40):
 41    return angle_idx * angular_velocity_bins + angular_velocity_idx
 42
 43def get_torque_value(torque_bin_edges, action):
 44    return (torque_bin_edges[action] + torque_bin_edges[action + 1]) / 2.0
 45
 46def compute_next_probable_states(
 47        angle_idx,
 48        angular_velocity_idx,
 49        action,
 50        angle_bin_edges,
 51        angular_velocity_bin_edges,
 52        torque_bin_edges,
 53        num_samples=11,
 54        g=10.0,
 55        l=1.0,
 56        m=1.0,
 57        dt=0.05
 58):
 59    angle_low, angle_high = angle_bin_edges[angle_idx], angle_bin_edges[angle_idx + 1]
 60    angular_velocity_low, angular_velocity_high = angular_velocity_bin_edges[angular_velocity_idx], angular_velocity_bin_edges[angular_velocity_idx + 1]
 61    torque = get_torque_value(torque_bin_edges, action)
 62
 63    min_angular_velocity = angular_velocity_bin_edges[0]
 64    max_angular_velocity = angular_velocity_bin_edges[-1]
 65
 66    angle_samples = np.linspace(angle_low, angle_high, num_samples)
 67    angle_samples = angle_samples[1:-1] # Exclude the bin edges
 68    angular_velocity_samples = np.linspace(angular_velocity_low, angular_velocity_high, num_samples)
 69    angular_velocity_samples = angular_velocity_samples[1:-1]
 70
 71    angle_bins = len(angle_bin_edges) - 1
 72    angular_velocity_bins = len(angular_velocity_bin_edges) - 1
 73
 74    next_states_and_rewards = {}
 75
 76    for angle in angle_samples:
 77        for angular_velocity in angular_velocity_samples:
 78            costs = angle_normalize(angle) ** 2 + 0.1 * angular_velocity ** 2 + 0.001 * (torque ** 2)
 79
 80            new_angular_velocity = angular_velocity + (3 * g / (2 * l) * np.sin(angle) + 3.0 / (m * l ** 2) * torque) * dt
 81            new_angular_velocity = np.clip(new_angular_velocity, min_angular_velocity+1e-6, max_angular_velocity-1e-6)
 82
 83            new_angle = angle + new_angular_velocity * dt
 84            new_angle = wrap(new_angle, -np.pi, np.pi)
 85
 86            new_angle_idx = np.digitize(new_angle, angle_bin_edges) - 1
 87            new_angular_velocity_idx = np.digitize(new_angular_velocity, angular_velocity_bin_edges) - 1
 88
 89            new_state = state_to_index(
 90                new_angle_idx, 
 91                new_angular_velocity_idx, 
 92                angular_velocity_bins)
 93            
 94            if new_state < 0 or new_state >= angle_bins * angular_velocity_bins:
 95                raise ValueError(f"Invalid state index: {new_state}")
 96            
 97            terminated = False
 98
 99            summary = (new_state, -costs, terminated)
100
101            if new_state not in next_states_and_rewards:
102                next_states_and_rewards[new_state] = []
103            next_states_and_rewards[new_state].append(summary)
104
105    n_total = len(angle_samples) * len(angular_velocity_samples)
106
107    results = []
108
109    for new_state, summaries in next_states_and_rewards.items():
110        n = len(summaries)
111        prob = n / n_total
112        ave_reward = sum(r for _, r, _ in summaries) / n
113        terminated = False
114        results.append((prob, new_state, ave_reward, terminated))
115
116    return results
117
118def setup_transition_probabilities_for_state(args):
119    state, angle_bin_edges, angular_velocity_bin_edges, torque_bin_edges, dim_samples = args
120    angle_bins = len(angle_bin_edges) - 1
121    angular_velocity_bins = len(angular_velocity_bin_edges) - 1
122    action_bins = len(torque_bin_edges) - 1
123
124    P_state = {action: [] for action in range(action_bins)}
125
126    angle_idx, angular_velocity_idx = index_to_state(state, angle_bins, angular_velocity_bins)
127
128    for action in range(action_bins):
129        P_state[action] = compute_next_probable_states(
130            angle_idx,
131            angular_velocity_idx,
132            action,
133            angle_bin_edges,
134            angular_velocity_bin_edges,
135            torque_bin_edges,
136            num_samples=dim_samples
137        )
138
139    try:
140        return (state, P_state)
141    except Exception as e:
142        print(f"Error in state {state}: {e}")
143        return None
144
145class DiscretizedPendulum:
146    """
147    Initialize the DiscretizedPendulum environment.
148    Parameters:
149    -----------
150    angle_bins : int
151        Number of bins to discretize the angle.
152    angular_velocity_bins : int
153        Number of bins to discretize the angular velocity.
154    torque_bins : int, optional (default=11)
155        Number of bins to discretize the torque.
156    n_workers : int, optional (default=4)
157        Number of worker processes to use for setting up transition probabilities.
158    cache_dir : str, optional (default='./cached')
159        Directory to cache the transition probabilities.
160    dim_samples : int, optional (default=11)
161        Number of samples to use for each dimension when setting up transition probabilities.
162    Attributes:
163    -----------
164    angle_bins : int
165        Number of bins to discretize the angle. Must be odd.
166    angular_velocity_bins : int
167        Number of bins to discretize the angular velocity. Must be odd.
168    dim_samples : int
169        Number of samples to use for each dimension when setting up transition probabilities.
170    angle_bin_edges : numpy.ndarray
171        Edges of the bins for discretizing the angle.
172    angular_velocity_bin_edges : numpy.ndarray
173        Edges of the bins for discretizing the angular velocity.
174    torque_bin_edges : numpy.ndarray
175        Edges of the bins for discretizing the torque.
176    state_space : int
177        Total number of discrete states.
178    action_space : int
179        Total number of discrete actions.
180    P : dict
181        Transition probability matrix.
182    n_workers : int
183        Number of worker processes to use for setting up transition probabilities.
184    """
185    def __init__(
186            self,
187            angle_bins,
188            angular_velocity_bins,
189            torque_bins=11,
190            n_workers=4,
191            cache_dir='./cached',
192            dim_samples=11
193    ):
194        self.angle_bins = angle_bins
195        self.angular_velocity_bins = angular_velocity_bins
196        self.dim_samples = dim_samples
197        self.angle_bin_edges = generate_bin_edges(np.pi, angle_bins, 3, center=True)
198        self.angular_velocity_bin_edges = generate_bin_edges(8, angular_velocity_bins, 3, center=False)
199        self.torque_bin_edges = generate_bin_edges(2, torque_bins, 3, center=False)
200
201        self.state_space = angle_bins * angular_velocity_bins
202        self.action_space = torque_bins
203
204        self.P = {state: {action: [] for action in range(torque_bins)} for state in range(self.state_space)}
205
206        self.n_workers = n_workers
207
208        cached_P_filepath = CACHED_P_PATH_FORMAT.format(angle_bins=angle_bins, angular_velocity_bins=angular_velocity_bins, action_bins=torque_bins)
209        cached_P_filepath = os.path.join(cache_dir, cached_P_filepath)
210
211        if not os.path.exists(cache_dir):
212            os.makedirs(cache_dir)
213
214        if os.path.exists(cached_P_filepath):
215            with gzip.open(cached_P_filepath, 'rb') as f:
216                self.P = pickle.load(f)
217        else:
218            self.setup_transition_probabilities()
219            with gzip.open(cached_P_filepath, 'wb') as f:
220                pickle.dump(self.P, f)
221
222    def discretize_angle(self, angle):
223        return np.digitize(angle, self.angle_bin_edges) - 1
224    
225    def discretize_angular_velocity(self, angular_velocity):
226        return np.digitize(angular_velocity, self.angular_velocity_bin_edges) - 1
227    
228    def index_to_state(self, index):
229        return index_to_state(index, self.angle_bins, self.angular_velocity_bins)
230    
231    def state_to_index(self, angle_idx, angular_velocity_idx):
232        idx = state_to_index(angle_idx, angular_velocity_idx, self.angular_velocity_bins)
233        if idx < 0 or idx >= self.state_space:
234            raise ValueError(f"Invalid state index: {idx}")
235        return idx
236
237    def transform_cont_obs(self, cont_obs):
238        x = cont_obs[0]
239        y = cont_obs[1]
240        theta = np.arctan2(y, x)
241        theta = wrap(theta, -np.pi, np.pi)
242        theta_dot = cont_obs[2]
243        theta_dot = np.clip(theta_dot, -8+1e-6, 8-1e-6)
244
245
246        angle_idx = self.discretize_angle(theta)
247        angular_velocity_idx = self.discretize_angular_velocity(theta_dot)
248
249        return self.state_to_index(angle_idx, angular_velocity_idx)
250
251    def get_action_value(self, action):
252        return get_torque_value(self.torque_bin_edges, action)
253
254    def setup_transition_probabilities(self):
255        state_space_values = list(range(self.state_space))
256
257        args = [
258            (
259                state,
260                self.angle_bin_edges,
261                self.angular_velocity_bin_edges,
262                self.torque_bin_edges,
263                self.dim_samples
264            )
265            for state in state_space_values
266        ]
267
268        new_P = {}
269
270        args = [arg for arg in args if arg[0] not in new_P]
271
272        num_workers = self.n_workers
273
274        n_completed = len(new_P)
275
276        batch_size = 1000
277
278        with ProcessPoolExecutor(max_workers=num_workers) as executor:
279            for i in range(0, len(args), batch_size):
280                batch = args[i:i + batch_size]
281                futures = [executor.submit(setup_transition_probabilities_for_state, arg) for arg in batch]
282                for future in as_completed(futures):
283                    n_completed += 1
284                    try:
285                        state, P_state = future.result()
286                        new_P[state] = P_state
287                        if n_completed % 100 == 0:
288                            print(f'Completed {n_completed}/{self.state_space}')
289                    except Exception as e:
290                        print(f"Error in future: {e}")
291                        print('task failed')
292
293        self.P = new_P
294        
295if __name__ == '__main__':
296    n_bins = 31
297    angle_bins = n_bins
298    angular_velocity_bins = n_bins
299
300    discretized_pendulum = DiscretizedPendulum(
301        angle_bins=angle_bins,
302        angular_velocity_bins=angular_velocity_bins
303    )
304
305    angle = np.pi / 2
306    angular_velocity = 3
307
308    obs = np.array([np.cos(angle), np.sin(angle), angular_velocity])
309
310    state = discretized_pendulum.transform_cont_obs(obs)
311    print(f'Discretized state index: {state}')
312
313    for action in range(discretized_pendulum.action_space):
314        transitions = discretized_pendulum.P[state][action]
315        for prob, next_state, reward, terminated in transitions:
316            print(f'Action: {action}, Probability: {prob}, Next state: {next_state}, Reward: {reward}, Terminated: {terminated}')
CACHED_P_PATH_FORMAT = 'cached_P_discretized_pendulum_{angle_bins}_{angular_velocity_bins}_{action_bins}.pkl.gz'
def index_to_state(index, angle_bins, angular_velocity_bins):
18def index_to_state(
19        index,
20        angle_bins,
21        angular_velocity_bins
22):
23    angle_idx = index // angular_velocity_bins
24    angular_velocity_idx = index % angular_velocity_bins
25    return angle_idx, angular_velocity_idx
def index_to_continous_state(index, angle_bin_edges, angular_velocity_bin_edges):
27def index_to_continous_state(
28        index,
29        angle_bin_edges,
30        angular_velocity_bin_edges
31):
32    angle_idx, angular_velocity_idx = index_to_state(index, len(angle_bin_edges) - 1, len(angular_velocity_bin_edges) - 1)
33    angle = (angle_bin_edges[angle_idx] + angle_bin_edges[angle_idx + 1]) / 2.0
34    angular_velocity = (angular_velocity_bin_edges[angular_velocity_idx] + angular_velocity_bin_edges[angular_velocity_idx + 1]) / 2.0
35    return angle, angular_velocity
def state_to_index(angle_idx, angular_velocity_idx, angular_velocity_bins):
37def state_to_index(
38        angle_idx,
39        angular_velocity_idx,
40        angular_velocity_bins
41):
42    return angle_idx * angular_velocity_bins + angular_velocity_idx
def get_torque_value(torque_bin_edges, action):
44def get_torque_value(torque_bin_edges, action):
45    return (torque_bin_edges[action] + torque_bin_edges[action + 1]) / 2.0
def compute_next_probable_states( angle_idx, angular_velocity_idx, action, angle_bin_edges, angular_velocity_bin_edges, torque_bin_edges, num_samples=11, g=10.0, l=1.0, m=1.0, dt=0.05):
 47def compute_next_probable_states(
 48        angle_idx,
 49        angular_velocity_idx,
 50        action,
 51        angle_bin_edges,
 52        angular_velocity_bin_edges,
 53        torque_bin_edges,
 54        num_samples=11,
 55        g=10.0,
 56        l=1.0,
 57        m=1.0,
 58        dt=0.05
 59):
 60    angle_low, angle_high = angle_bin_edges[angle_idx], angle_bin_edges[angle_idx + 1]
 61    angular_velocity_low, angular_velocity_high = angular_velocity_bin_edges[angular_velocity_idx], angular_velocity_bin_edges[angular_velocity_idx + 1]
 62    torque = get_torque_value(torque_bin_edges, action)
 63
 64    min_angular_velocity = angular_velocity_bin_edges[0]
 65    max_angular_velocity = angular_velocity_bin_edges[-1]
 66
 67    angle_samples = np.linspace(angle_low, angle_high, num_samples)
 68    angle_samples = angle_samples[1:-1] # Exclude the bin edges
 69    angular_velocity_samples = np.linspace(angular_velocity_low, angular_velocity_high, num_samples)
 70    angular_velocity_samples = angular_velocity_samples[1:-1]
 71
 72    angle_bins = len(angle_bin_edges) - 1
 73    angular_velocity_bins = len(angular_velocity_bin_edges) - 1
 74
 75    next_states_and_rewards = {}
 76
 77    for angle in angle_samples:
 78        for angular_velocity in angular_velocity_samples:
 79            costs = angle_normalize(angle) ** 2 + 0.1 * angular_velocity ** 2 + 0.001 * (torque ** 2)
 80
 81            new_angular_velocity = angular_velocity + (3 * g / (2 * l) * np.sin(angle) + 3.0 / (m * l ** 2) * torque) * dt
 82            new_angular_velocity = np.clip(new_angular_velocity, min_angular_velocity+1e-6, max_angular_velocity-1e-6)
 83
 84            new_angle = angle + new_angular_velocity * dt
 85            new_angle = wrap(new_angle, -np.pi, np.pi)
 86
 87            new_angle_idx = np.digitize(new_angle, angle_bin_edges) - 1
 88            new_angular_velocity_idx = np.digitize(new_angular_velocity, angular_velocity_bin_edges) - 1
 89
 90            new_state = state_to_index(
 91                new_angle_idx, 
 92                new_angular_velocity_idx, 
 93                angular_velocity_bins)
 94            
 95            if new_state < 0 or new_state >= angle_bins * angular_velocity_bins:
 96                raise ValueError(f"Invalid state index: {new_state}")
 97            
 98            terminated = False
 99
100            summary = (new_state, -costs, terminated)
101
102            if new_state not in next_states_and_rewards:
103                next_states_and_rewards[new_state] = []
104            next_states_and_rewards[new_state].append(summary)
105
106    n_total = len(angle_samples) * len(angular_velocity_samples)
107
108    results = []
109
110    for new_state, summaries in next_states_and_rewards.items():
111        n = len(summaries)
112        prob = n / n_total
113        ave_reward = sum(r for _, r, _ in summaries) / n
114        terminated = False
115        results.append((prob, new_state, ave_reward, terminated))
116
117    return results
def setup_transition_probabilities_for_state(args):
119def setup_transition_probabilities_for_state(args):
120    state, angle_bin_edges, angular_velocity_bin_edges, torque_bin_edges, dim_samples = args
121    angle_bins = len(angle_bin_edges) - 1
122    angular_velocity_bins = len(angular_velocity_bin_edges) - 1
123    action_bins = len(torque_bin_edges) - 1
124
125    P_state = {action: [] for action in range(action_bins)}
126
127    angle_idx, angular_velocity_idx = index_to_state(state, angle_bins, angular_velocity_bins)
128
129    for action in range(action_bins):
130        P_state[action] = compute_next_probable_states(
131            angle_idx,
132            angular_velocity_idx,
133            action,
134            angle_bin_edges,
135            angular_velocity_bin_edges,
136            torque_bin_edges,
137            num_samples=dim_samples
138        )
139
140    try:
141        return (state, P_state)
142    except Exception as e:
143        print(f"Error in state {state}: {e}")
144        return None
class DiscretizedPendulum:
146class DiscretizedPendulum:
147    """
148    Initialize the DiscretizedPendulum environment.
149    Parameters:
150    -----------
151    angle_bins : int
152        Number of bins to discretize the angle.
153    angular_velocity_bins : int
154        Number of bins to discretize the angular velocity.
155    torque_bins : int, optional (default=11)
156        Number of bins to discretize the torque.
157    n_workers : int, optional (default=4)
158        Number of worker processes to use for setting up transition probabilities.
159    cache_dir : str, optional (default='./cached')
160        Directory to cache the transition probabilities.
161    dim_samples : int, optional (default=11)
162        Number of samples to use for each dimension when setting up transition probabilities.
163    Attributes:
164    -----------
165    angle_bins : int
166        Number of bins to discretize the angle. Must be odd.
167    angular_velocity_bins : int
168        Number of bins to discretize the angular velocity. Must be odd.
169    dim_samples : int
170        Number of samples to use for each dimension when setting up transition probabilities.
171    angle_bin_edges : numpy.ndarray
172        Edges of the bins for discretizing the angle.
173    angular_velocity_bin_edges : numpy.ndarray
174        Edges of the bins for discretizing the angular velocity.
175    torque_bin_edges : numpy.ndarray
176        Edges of the bins for discretizing the torque.
177    state_space : int
178        Total number of discrete states.
179    action_space : int
180        Total number of discrete actions.
181    P : dict
182        Transition probability matrix.
183    n_workers : int
184        Number of worker processes to use for setting up transition probabilities.
185    """
186    def __init__(
187            self,
188            angle_bins,
189            angular_velocity_bins,
190            torque_bins=11,
191            n_workers=4,
192            cache_dir='./cached',
193            dim_samples=11
194    ):
195        self.angle_bins = angle_bins
196        self.angular_velocity_bins = angular_velocity_bins
197        self.dim_samples = dim_samples
198        self.angle_bin_edges = generate_bin_edges(np.pi, angle_bins, 3, center=True)
199        self.angular_velocity_bin_edges = generate_bin_edges(8, angular_velocity_bins, 3, center=False)
200        self.torque_bin_edges = generate_bin_edges(2, torque_bins, 3, center=False)
201
202        self.state_space = angle_bins * angular_velocity_bins
203        self.action_space = torque_bins
204
205        self.P = {state: {action: [] for action in range(torque_bins)} for state in range(self.state_space)}
206
207        self.n_workers = n_workers
208
209        cached_P_filepath = CACHED_P_PATH_FORMAT.format(angle_bins=angle_bins, angular_velocity_bins=angular_velocity_bins, action_bins=torque_bins)
210        cached_P_filepath = os.path.join(cache_dir, cached_P_filepath)
211
212        if not os.path.exists(cache_dir):
213            os.makedirs(cache_dir)
214
215        if os.path.exists(cached_P_filepath):
216            with gzip.open(cached_P_filepath, 'rb') as f:
217                self.P = pickle.load(f)
218        else:
219            self.setup_transition_probabilities()
220            with gzip.open(cached_P_filepath, 'wb') as f:
221                pickle.dump(self.P, f)
222
223    def discretize_angle(self, angle):
224        return np.digitize(angle, self.angle_bin_edges) - 1
225    
226    def discretize_angular_velocity(self, angular_velocity):
227        return np.digitize(angular_velocity, self.angular_velocity_bin_edges) - 1
228    
229    def index_to_state(self, index):
230        return index_to_state(index, self.angle_bins, self.angular_velocity_bins)
231    
232    def state_to_index(self, angle_idx, angular_velocity_idx):
233        idx = state_to_index(angle_idx, angular_velocity_idx, self.angular_velocity_bins)
234        if idx < 0 or idx >= self.state_space:
235            raise ValueError(f"Invalid state index: {idx}")
236        return idx
237
238    def transform_cont_obs(self, cont_obs):
239        x = cont_obs[0]
240        y = cont_obs[1]
241        theta = np.arctan2(y, x)
242        theta = wrap(theta, -np.pi, np.pi)
243        theta_dot = cont_obs[2]
244        theta_dot = np.clip(theta_dot, -8+1e-6, 8-1e-6)
245
246
247        angle_idx = self.discretize_angle(theta)
248        angular_velocity_idx = self.discretize_angular_velocity(theta_dot)
249
250        return self.state_to_index(angle_idx, angular_velocity_idx)
251
252    def get_action_value(self, action):
253        return get_torque_value(self.torque_bin_edges, action)
254
255    def setup_transition_probabilities(self):
256        state_space_values = list(range(self.state_space))
257
258        args = [
259            (
260                state,
261                self.angle_bin_edges,
262                self.angular_velocity_bin_edges,
263                self.torque_bin_edges,
264                self.dim_samples
265            )
266            for state in state_space_values
267        ]
268
269        new_P = {}
270
271        args = [arg for arg in args if arg[0] not in new_P]
272
273        num_workers = self.n_workers
274
275        n_completed = len(new_P)
276
277        batch_size = 1000
278
279        with ProcessPoolExecutor(max_workers=num_workers) as executor:
280            for i in range(0, len(args), batch_size):
281                batch = args[i:i + batch_size]
282                futures = [executor.submit(setup_transition_probabilities_for_state, arg) for arg in batch]
283                for future in as_completed(futures):
284                    n_completed += 1
285                    try:
286                        state, P_state = future.result()
287                        new_P[state] = P_state
288                        if n_completed % 100 == 0:
289                            print(f'Completed {n_completed}/{self.state_space}')
290                    except Exception as e:
291                        print(f"Error in future: {e}")
292                        print('task failed')
293
294        self.P = new_P

Initialize the DiscretizedPendulum environment.

Parameters:

angle_bins : int Number of bins to discretize the angle. angular_velocity_bins : int Number of bins to discretize the angular velocity. torque_bins : int, optional (default=11) Number of bins to discretize the torque. n_workers : int, optional (default=4) Number of worker processes to use for setting up transition probabilities. cache_dir : str, optional (default='./cached') Directory to cache the transition probabilities. dim_samples : int, optional (default=11) Number of samples to use for each dimension when setting up transition probabilities.

Attributes:

angle_bins : int Number of bins to discretize the angle. Must be odd. angular_velocity_bins : int Number of bins to discretize the angular velocity. Must be odd. dim_samples : int Number of samples to use for each dimension when setting up transition probabilities. angle_bin_edges : numpy.ndarray Edges of the bins for discretizing the angle. angular_velocity_bin_edges : numpy.ndarray Edges of the bins for discretizing the angular velocity. torque_bin_edges : numpy.ndarray Edges of the bins for discretizing the torque. state_space : int Total number of discrete states. action_space : int Total number of discrete actions. P : dict Transition probability matrix. n_workers : int Number of worker processes to use for setting up transition probabilities.

DiscretizedPendulum( angle_bins, angular_velocity_bins, torque_bins=11, n_workers=4, cache_dir='./cached', dim_samples=11)
186    def __init__(
187            self,
188            angle_bins,
189            angular_velocity_bins,
190            torque_bins=11,
191            n_workers=4,
192            cache_dir='./cached',
193            dim_samples=11
194    ):
195        self.angle_bins = angle_bins
196        self.angular_velocity_bins = angular_velocity_bins
197        self.dim_samples = dim_samples
198        self.angle_bin_edges = generate_bin_edges(np.pi, angle_bins, 3, center=True)
199        self.angular_velocity_bin_edges = generate_bin_edges(8, angular_velocity_bins, 3, center=False)
200        self.torque_bin_edges = generate_bin_edges(2, torque_bins, 3, center=False)
201
202        self.state_space = angle_bins * angular_velocity_bins
203        self.action_space = torque_bins
204
205        self.P = {state: {action: [] for action in range(torque_bins)} for state in range(self.state_space)}
206
207        self.n_workers = n_workers
208
209        cached_P_filepath = CACHED_P_PATH_FORMAT.format(angle_bins=angle_bins, angular_velocity_bins=angular_velocity_bins, action_bins=torque_bins)
210        cached_P_filepath = os.path.join(cache_dir, cached_P_filepath)
211
212        if not os.path.exists(cache_dir):
213            os.makedirs(cache_dir)
214
215        if os.path.exists(cached_P_filepath):
216            with gzip.open(cached_P_filepath, 'rb') as f:
217                self.P = pickle.load(f)
218        else:
219            self.setup_transition_probabilities()
220            with gzip.open(cached_P_filepath, 'wb') as f:
221                pickle.dump(self.P, f)
angle_bins
angular_velocity_bins
dim_samples
angle_bin_edges
angular_velocity_bin_edges
torque_bin_edges
state_space
action_space
P
n_workers
def discretize_angle(self, angle):
223    def discretize_angle(self, angle):
224        return np.digitize(angle, self.angle_bin_edges) - 1
def discretize_angular_velocity(self, angular_velocity):
226    def discretize_angular_velocity(self, angular_velocity):
227        return np.digitize(angular_velocity, self.angular_velocity_bin_edges) - 1
def index_to_state(self, index):
229    def index_to_state(self, index):
230        return index_to_state(index, self.angle_bins, self.angular_velocity_bins)
def state_to_index(self, angle_idx, angular_velocity_idx):
232    def state_to_index(self, angle_idx, angular_velocity_idx):
233        idx = state_to_index(angle_idx, angular_velocity_idx, self.angular_velocity_bins)
234        if idx < 0 or idx >= self.state_space:
235            raise ValueError(f"Invalid state index: {idx}")
236        return idx
def transform_cont_obs(self, cont_obs):
238    def transform_cont_obs(self, cont_obs):
239        x = cont_obs[0]
240        y = cont_obs[1]
241        theta = np.arctan2(y, x)
242        theta = wrap(theta, -np.pi, np.pi)
243        theta_dot = cont_obs[2]
244        theta_dot = np.clip(theta_dot, -8+1e-6, 8-1e-6)
245
246
247        angle_idx = self.discretize_angle(theta)
248        angular_velocity_idx = self.discretize_angular_velocity(theta_dot)
249
250        return self.state_to_index(angle_idx, angular_velocity_idx)
def get_action_value(self, action):
252    def get_action_value(self, action):
253        return get_torque_value(self.torque_bin_edges, action)
def setup_transition_probabilities(self):
255    def setup_transition_probabilities(self):
256        state_space_values = list(range(self.state_space))
257
258        args = [
259            (
260                state,
261                self.angle_bin_edges,
262                self.angular_velocity_bin_edges,
263                self.torque_bin_edges,
264                self.dim_samples
265            )
266            for state in state_space_values
267        ]
268
269        new_P = {}
270
271        args = [arg for arg in args if arg[0] not in new_P]
272
273        num_workers = self.n_workers
274
275        n_completed = len(new_P)
276
277        batch_size = 1000
278
279        with ProcessPoolExecutor(max_workers=num_workers) as executor:
280            for i in range(0, len(args), batch_size):
281                batch = args[i:i + batch_size]
282                futures = [executor.submit(setup_transition_probabilities_for_state, arg) for arg in batch]
283                for future in as_completed(futures):
284                    n_completed += 1
285                    try:
286                        state, P_state = future.result()
287                        new_P[state] = P_state
288                        if n_completed % 100 == 0:
289                            print(f'Completed {n_completed}/{self.state_space}')
290                    except Exception as e:
291                        print(f"Error in future: {e}")
292                        print('task failed')
293
294        self.P = new_P