8 #include "./util/serialization/basics.h" 13 #include <mapbox/variant_cast.hpp> 57 const std::shared_ptr<IDecider> &
decider =
nullptr,
67 Tree(std::string filename);
107 const bool &finalize =
true);
144 const size_t &n_threads,
const bool &complete_dfs =
true,
145 const std::vector<float> &weights = std::vector<float>());
161 Tree *
fit_dprov(std::shared_ptr<IDataProvider> data_provider,
162 const bool &complete_dfs =
true);
173 const std::function<
void(
void *)> &dptf =
nullptr)
const;
202 const bool &use_fast_prediction_if_available =
true,
204 const bool &for_forest =
false);
212 const int &num_threads = 1,
213 const bool &use_fast_prediction_if_available =
true);
220 const std::function<
void(
void *)> &dptf =
nullptr)
const {
228 const std::vector<
Data<Mat>> &leaf_results,
259 return decider->get_data_dim();
281 inline const std::vector<std::pair<id_t, id_t>>
get_tree()
const {
298 VLOG(9) <<
"Disabling fast prediction; freeing memory.";
310 void save(
const std::string &filename)
const;
314 stream <<
"forpy::Tree[depth " <<
self.get_depth() <<
"]";
321 template <
class Archive>
353 std::vector<std::pair<id_t, id_t>>
tree;
361 mu::variant<std::vector<std::tuple<size_t, float, size_t, size_t>>,
362 std::vector<std::tuple<size_t, double, size_t, size_t>>,
363 std::vector<std::tuple<size_t, uint32_t, size_t, size_t>>,
364 std::vector<std::tuple<size_t, uint8_t, size_t, size_t>>>>
382 const uint &n_valid_features_to_use = 0,
383 const bool &autoscale_valid_features =
false,
385 const size_t &n_thresholds = 0,
386 const float &gain_threshold = 1E-7f);
388 inline std::unordered_map<std::string, mu::variant<uint, size_t, float, bool>>
394 const std::unordered_map<
395 std::string, mu::variant<uint, size_t, float, bool>> &
params) {
396 return std::make_shared<ClassificationTree>(
397 GetWithDefVar<uint>(
params,
"max_depth",
398 std::numeric_limits<uint>::max()),
399 GetWithDefVar<uint>(
params,
"min_samples_at_leaf", 1),
400 GetWithDefVar<uint>(
params,
"min_samples_at_node", 2),
401 GetWithDefVar<uint>(
params,
"n_valid_features_to_use", 0),
402 GetWithDefVar<bool>(
params,
"autoscale_valid_features",
false),
403 GetWithDefVar<uint>(
params,
"random_seed", 1),
404 GetWithDefVar<size_t>(
params,
"n_thresholds", 0),
405 GetWithDefVar<float>(
params,
"gain_threshold", 1E-7f));
410 stream <<
"forpy::ClassificationTree[depth " <<
self.get_depth() <<
"]";
415 std::unordered_map<std::string, mu::variant<uint, size_t, float, bool>>
418 template <
class Archive>
420 ar(cereal::make_nvp(
"base", cereal::base_class<Tree>(
this)),
432 const uint &n_valid_features_to_use = 0,
433 const bool &autoscale_valid_features =
false,
435 const float &gain_threshold = 1E-7f,
436 const bool &store_variance =
false,
437 const bool &summarize =
false);
439 inline std::unordered_map<std::string, mu::variant<uint, size_t, float, bool>>
445 const std::unordered_map<
446 std::string, mu::variant<uint, size_t, float, bool>> &
params) {
447 return std::make_shared<RegressionTree>(
448 GetWithDefVar<uint>(
params,
"max_depth",
449 std::numeric_limits<uint>::max()),
450 GetWithDefVar<uint>(
params,
"min_samples_at_leaf", 1),
451 GetWithDefVar<uint>(
params,
"min_samples_at_node", 2),
452 GetWithDefVar<uint>(
params,
"n_valid_features_to_use", 0),
453 GetWithDefVar<bool>(
params,
"autoscale_valid_features",
false),
454 GetWithDefVar<uint>(
params,
"random_seed", 1),
455 GetWithDefVar<size_t>(
params,
"n_thresholds", 0),
456 GetWithDefVar<float>(
params,
"gain_threshold", 1E-7f),
457 GetWithDefVar<bool>(
params,
"store_variance",
false),
458 GetWithDefVar<bool>(
params,
"summarize",
false));
463 stream <<
"forpy::RegressionTree[depth " <<
self.get_depth() <<
"]";
468 std::unordered_map<std::string, mu::variant<uint, size_t, float, bool>>
471 template <
class Archive>
473 ar(cereal::make_nvp(
"base", cereal::base_class<Tree>(
this)),
480 #endif // FORPY_TREE_H_
friend class cereal::access
unsigned int min_samples_at_node
std::unordered_map< std::string, mu::variant< uint, size_t, float, bool > > get_params(const bool &=false) const
std::unordered_map< std::string, mu::variant< uint, size_t, float, bool > > params
std::atomic< size_t > stored_in_leafs
void serialize(Archive &ar, const uint &)
size_t get_input_data_dimensions() const
The data dimension that is required by this tree.
void parallel_DFS(Desk *d, TodoMark &mark, IDataProvider *data_provider, const bool &finalize=true)
const std::vector< std::pair< id_t, id_t > > get_tree() const
Tree * fit_dprov(std::shared_ptr< IDataProvider > data_provider, const bool &complete_dfs=true)
The fitting function for a single tree.
friend class cereal::access
std::unique_ptr< mu::variant< std::vector< std::tuple< size_t, float, size_t, size_t > >, std::vector< std::tuple< size_t, double, size_t, size_t > >, std::vector< std::tuple< size_t, uint32_t, size_t, size_t > >, std::vector< std::tuple< size_t, uint8_t, size_t, size_t > > > > fast_tree
void disable_fast_prediction()
bool is_initialized_for_training
void enable_fast_prediction()
A data provider for the training of one tree.
std::vector< std::future< void > > futures
void DFS_and_store(Desk *d, TodoMark &mark, const IDataProvider *dprov, const ECompletionLevel &comp)
size_t id_t
Element id type.
void set_weight(const float &new_weight)
Sets the tree weight.
The main tree class for the forpy framework.
DISALLOW_COPY_AND_ASSIGN(ClassificationTree)
typename mu::variant< Empty, STOT< float >, STOT< double >, STOT< uint >, STOT< uint8_t > > Data
Storing a variant of the provided data container type.
void make_node(const IDataProvider *data_provider, Desk *d)
Handle the creation of one tree node.
std::shared_ptr< ILeaf > leaf_manager
friend std::ostream & operator<<(std::ostream &stream, const Tree &self)
DISALLOW_COPY_AND_ASSIGN(Tree)
friend class cereal::access
std::shared_ptr< IDecider > decider
id_t predict_leaf(const Data< MatCRef > &data, const id_t &start_node=0, const std::function< void(void *)> &dptf=nullptr) const
Get the leaf id of the leaf where the given data will arrive.
Stores the parameters for one marked tree node.
std::shared_ptr< const IDecider > get_decider() const
The classifier manager used by this tree.
Data< Mat > predict(const Data< MatCRef > &data_v, const int &num_threads=1, const bool &use_fast_prediction_if_available=true, const bool &predict_proba=false, const bool &for_forest=false)
Data< Mat > predict_proba(const Data< MatCRef > &data_v, const int &num_threads=1, const bool &use_fast_prediction_if_available=true)
Overload for consistency with the sklearn interface.
std::atomic< id_t > next_id
float get_weight() const
The tree weight.
Data< Mat > predict_leaf_result(const Data< MatCRef > &data, const id_t &start_node=0, const std::function< void(void *)> &dptf=nullptr) const
Get the data prediction result for the given data.
bool is_initialized() const
Whether the trees fit method has been called and its DFS and BFS methods can now be used...
void serialize(Archive &ar, const uint &)
std::shared_ptr< RegressionTree > set_params(const std::unordered_map< std::string, mu::variant< uint, size_t, float, bool >> ¶ms)
Tree(const uint &max_depth=std::numeric_limits< uint >::max(), const uint &min_samples_at_leaf=1, const uint &min_samples_at_node=2, const std::shared_ptr< IDecider > &decider=nullptr, const std::shared_ptr< ILeaf > &leaf_manager=nullptr, const uint &random_seed=1)
The standard constructor for the forpy trees.
RegressionTree(const std::string &filename)
size_t get_samples_stored() const
The number of samples stored in leafs.
std::shared_ptr< const ILeaf > get_leaf_manager() const
The leaf manager used by this tree.
std::shared_ptr< ClassificationTree > set_params(const std::unordered_map< std::string, mu::variant< uint, size_t, float, bool >> ¶ms)
DISALLOW_COPY_AND_ASSIGN(RegressionTree)
std::vector< std::pair< id_t, id_t > > tree
std::unordered_map< std::string, mu::variant< uint, size_t, float, bool > > get_params(const bool &=false) const
Eigen::Matrix< DT, Eigen::Dynamic, 1, Eigen::ColMajor > Vec
Data< Mat > combine_leaf_results(const std::vector< Data< Mat >> &leaf_results, const Vec< float > &weights=Vec< float >(), const bool &predict_proba=false) const
void serialize(Archive &ar, const uint &)
bool operator==(Tree const &rhs) const
std::unordered_map< std::string, mu::variant< uint, size_t, float, bool > > params
ClassificationTree(const std::string &filename)
size_t get_n_nodes() const
The number of tree nodes.
ECompletionLevel
Specifies the completion level for one training step.
void DFS(const IDataProvider *data_provider, const ECompletionLevel &completion, Desk *d)
Do one DFS step with given completion level.
void save(const std::string &filename) const
Save the tree.
unsigned int uint
Convenience typedef for unsigned int.
friend std::ostream & operator<<(std::ostream &stream, const RegressionTree &self)
Tree * fit(const Data< MatCRef > &data_v, const Data< MatCRef > &annotation_v, const size_t &n_threads, const bool &complete_dfs=true, const std::vector< float > &weights=std::vector< float >())
Standard fitting function.
unsigned int min_samples_at_leaf
friend std::ostream & operator<<(std::ostream &stream, const ClassificationTree &self)