Source code for tensortrade.strategies.tensorforce_trading_strategy

# Copyright 2019 The TensorTrade Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import json

import pandas as pd
import numpy as np

from abc import ABCMeta, abstractmethod
from typing import Union, Callable, List, Dict

from tensorforce.agents import Agent
from tensorforce.execution import Runner
from tensorforce.environments import Environment

from tensortrade.strategies import TradingStrategy


[docs]class TensorforceTradingStrategy(TradingStrategy): """A trading strategy capable of self tuning, training, and evaluating with Tensorforce."""
[docs] def __init__(self, environment: 'TradingEnvironment', agent_spec: any, save_best_agent: bool = False, **kwargs): """ Arguments: environment: A `TradingEnvironment` instance for the agent to trade within. agent: A `Tensorforce` agent or agent specification. save_best_agent (optional): The runner will automatically save the best agent kwargs (optional): Optional keyword arguments to adjust the strategy. """ self._max_episode_timesteps = kwargs.get('max_episode_timesteps', False) self._environment = Environment.create( environment='gym', level=environment, max_episode_timesteps=self._max_episode_timesteps) self._agent = Agent.create(agent=agent_spec, environment=self._environment) self._runner = Runner(agent=self._agent, environment=self._environment, save_best_agent=save_best_agent)
@property def agent(self) -> Agent: """A Tensorforce `Agent` instance that will learn the strategy.""" return self._agent @property def max_episode_timesteps(self) -> int: """The maximum timesteps per episode.""" return self._max_episode_timesteps @max_episode_timesteps.setter def max_episode_timesteps(self, max_episode_timesteps: int): self._max_episode_timesteps = max_episode_timesteps
[docs] def restore_agent(self, directory: str, filename: str = None): """Deserialize the strategy's learning agent from a file. Arguments: directory: The `str` path of the directory the agent checkpoint is stored in. filename (optional): The `str` path of the file the agent specification is stored in. The `.json` file extension will be automatically appended if not provided. """ self._agent = Agent.load(directory, filename=filename) self._runner = Runner(agent=self._agent, environment=self._environment)
[docs] def save_agent(self, directory: str, filename: str = None, append_timestep: bool = False): """Serialize the learning agent to a file for restoring later. Arguments: directory: The `str` path of the directory the agent checkpoint is stored in. filename (optional): The `str` path of the file the agent specification is stored in. The `.json` file extension will be automatically appended if not provided. append_timestep: Whether the timestep should be appended to filename to prevent overwriting previous models. Defaults to `False`. """ self._agent.save(directory=directory, filename=filename, append_timestep=append_timestep)
def _finished_episode_cb(self, runner: Runner) -> bool: n_episodes = runner.episodes n_timesteps = runner.episode_timesteps avg_reward = np.mean(runner.episode_rewards) print("Finished episode {} after {} timesteps.".format(n_episodes, n_timesteps)) print("Average episode reward: {})".format(avg_reward)) return True
[docs] def tune(self, steps: int = None, episodes: int = None, callback: Callable[[pd.DataFrame], bool] = None) -> pd.DataFrame: raise NotImplementedError
[docs] def run(self, steps: int = None, episodes: int = None, evaluation: bool = False, episode_callback: Callable[[pd.DataFrame], bool] = None) -> pd.DataFrame: self._runner.run(evaluation=evaluation, num_timesteps=steps, num_episodes=episodes, callback=episode_callback) n_episodes = self._runner.episodes n_timesteps = self._runner.timesteps avg_reward = np.mean(self._runner.episode_rewards) \ if self._runner.episodes > 0 else self._runner.episode_reward print("Finished running strategy.") print("Total episodes: {} ({} timesteps).".format(n_episodes, n_timesteps)) print("Average reward: {}.".format(avg_reward)) self._runner.close() return self._environment.environment._exchange._performance