Source code for tensortrade.agents.rl.tensorflow_agent

import pandas as pd
import tensorflow as tf

from typing import Union, Callable, List

from tensortrade.environments.trading_environment import TradingEnvironment
from tensortrade.features.feature_pipeline import FeaturePipeline
from tensortrade.agents import TradingAgent


""" [WIP] """


[docs]class TensorflowAgent(TradingAgent): """A trading agent capable of self tuning, training, and evaluating with the TensorFlow 2 `agents` API."""
[docs] def __init__(self, env: TradingEnvironment, feature_pipeline: FeaturePipeline): """ Args: env: A `TradingEnvironment` instance for the agent to trade within. feature_pipeline: A `FeaturePipeline` instance of feature transformations. """ super().__init__(env=env, feature_pipeline=feature_pipeline)
[docs] def tune(self, steps_per_train: int, steps_per_test: int, step_cb: Callable[[pd.DataFrame], bool]) -> pd.DataFrame: pass
[docs] def train(self, steps: int, callback: Callable[[pd.DataFrame], bool]) -> pd.DataFrame: # train_summary_writer = tf.compat.v2.summary.create_file_writer( # train_dir, flush_millis=summaries_flush_secs * 1000) # train_summary_writer.set_as_default() # eval_summary_writer = tf.compat.v2.summary.create_file_writer( # eval_dir, flush_millis=summaries_flush_secs * 1000) # eval_metrics = [ # tf_metrics.AverageReturnMetric(buffer_size=num_eval_episodes), # tf_metrics.AverageEpisodeLengthMetric(buffer_size=num_eval_episodes) # ] # global_step = tf.compat.v1.train.get_or_create_global_step() # with tf.compat.v2.summary.record_if( # lambda: tf.math.equal(global_step % summary_interval, 0)): # tf.compat.v1.set_random_seed(random_seed) # eval_tf_env = tf_py_environment.TFPyEnvironment(env_load_fn(env_name)) # tf_env = tf_py_environment.TFPyEnvironment( # parallel_py_environment.ParallelPyEnvironment( # [lambda: env_load_fn(env_name)] * num_parallel_environments)) # optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate) # if use_rnns: # actor_net = actor_distribution_rnn_network.ActorDistributionRnnNetwork( # tf_env.observation_spec(), # tf_env.action_spec(), # input_fc_layer_params=actor_fc_layers, # output_fc_layer_params=None) # value_net = value_rnn_network.ValueRnnNetwork( # tf_env.observation_spec(), # input_fc_layer_params=value_fc_layers, # output_fc_layer_params=None) # tf_agent = ppo_agent.PPOAgent( # tf_env.time_step_spec(), # tf_env.action_spec(), # optimizer, # actor_net=actor_net, # value_net=value_net, # num_epochs=num_epochs, # debug_summaries=debug_summaries, # summarize_grads_and_vars=summarize_grads_and_vars, # train_step_counter=global_step) # tf_agent.initialize() # environment_steps_metric = tf_metrics.EnvironmentSteps() # step_metrics = [ # tf_metrics.NumberOfEpisodes(), # environment_steps_metric, # ] # train_metrics = step_metrics + [ # tf_metrics.AverageReturnMetric( # batch_size=num_parallel_environments), # tf_metrics.AverageEpisodeLengthMetric( # batch_size=num_parallel_environments), # ] # eval_policy = tf_agent.policy # collect_policy = tf_agent.collect_policy # replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer( # tf_agent.collect_data_spec, # batch_size=num_parallel_environments, # max_length=replay_buffer_capacity) # collect_driver = dynamic_episode_driver.DynamicEpisodeDriver( # tf_env, # collect_policy, # observers=[replay_buffer.add_batch] + train_metrics, # num_episodes=collect_episodes_per_iteration) # def train_step(): # trajectories = replay_buffer.gather_all() # return tf_agent.train(experience=trajectories) # collect_time = 0 # train_time = 0 # timed_at_step = global_step.numpy() # while environment_steps_metric.result() < num_environment_steps: # global_step_val = global_step.numpy() # start_time = time.time() # collect_driver.run() # collect_time += time.time() - start_time # start_time = time.time() # total_loss, _ = train_step() # replay_buffer.clear() # train_time += time.time() - start_time # for train_metric in train_metrics: # train_metric.tf_summaries(train_step=global_step, step_metrics=step_metrics) # if global_step_val % log_interval == 0: # logging.info('step = %d, loss = %f', global_step_val, total_loss) # steps_per_sec = ( # (global_step_val - timed_at_step) / (collect_time + train_time)) # logging.info('%.3f steps/sec', steps_per_sec) # logging.info('collect_time = {}, train_time = {}'.format( # collect_time, train_time)) # with tf.compat.v2.summary.record_if(True): # tf.compat.v2.summary.scalar( # name='global_steps_per_sec', data=steps_per_sec, step=global_step) # timed_at_step = global_step_val # collect_time = 0 # train_time = 0 pass
[docs] def evaluate(self, steps: int, callback: Callable[[pd.DataFrame], bool]) -> pd.DataFrame: # metric_utils.eager_compute( # eval_metrics, # eval_tf_env, # eval_policy, # num_episodes=num_eval_episodes, # train_step=global_step, # summary_writer=eval_summary_writer, # summary_prefix='Metrics', # ) pass
[docs] def get_action(self, observation: pd.DataFrame) -> Union[float, List[float]]: pass