--- title: DDPG keywords: fastai sidebar: home_sidebar summary: "An implementation of DDPG, Deep Deterministic Policy Gradient." description: "An implementation of DDPG, Deep Deterministic Policy Gradient." nb_path: "nbs/rl/agents/ddpg.ipynb" ---
class Config(object):
tau = 1e-3
gamma = 0.9
embedding_size = 32
item_num = 5
user_num = 5
actor_hidden_sizes = (128, 64)
critic_hidden_sizes = (32, 16)
batch_size = 64
embedding_weight_decay = 1e-6
actor_weight_decay = 1e-6
critic_weight_decay = 1e-6
embedding_learning_rate = 1e-4
actor_learning_rate = 1e-4
critic_learning_rate = 1e-4
device = torch.device("cpu")
history_length = 5
buffer_size = 100
state_size = history_length + 1
action_size = 1
embedded_state_size = state_size * embedding_size
embedded_action_size = action_size * embedding_size
config = Config()
noise = OUNoise(embedded_action_size = 32,
ou_mu = 0.0,
ou_theta = 0.15,
ou_sigma = 0.2,
ou_epsilon = 1.0,
)
group2members_dict = {'0':[1,2,3], '1':[1,4,5]}
agent = DDPGAgent(config=config, noise=noise, group2members_dict=group2members_dict, verbose=True)