forpy  2
regressionleaf.h
Go to the documentation of this file.
1 /* Author: Christoph Lassner. */
2 #pragma once
3 #ifndef FORPY_LEAFS_REGRESSIONLEAF_H_
4 #define FORPY_LEAFS_REGRESSIONLEAF_H_
5 
6 #include "../global.h"
7 
8 #include "../util/serialization/basics.h"
9 
10 #include <numeric>
11 #include <utility>
12 #include <vector>
13 
14 #include "../data_providers/idataprovider.h"
15 #include "../impurities/ientropyfunction.h"
16 #include "../types.h"
17 #include "../util/checks.h"
18 #include "./ileaf.h"
19 
20 namespace forpy {
26 class RegressionLeaf : public ILeaf {
27  public:
28  inline explicit RegressionLeaf(const bool &store_variance = false,
29  const bool &summarize = false)
31  annot_dim(0),
34 
36  inline std::shared_ptr<ILeaf> create_duplicate() const {
38  return std::make_shared<RegressionLeaf>(store_variance);
39  }
40 
41  inline bool is_compatible_with(const IDataProvider &data_provider) {
42  this->annot_dim = data_provider.get_annot_vec_dim();
43  return true;
44  };
45  inline bool is_compatible_with(const IThreshOpt & /*threshopt*/) {
46  return true;
47  };
48  inline void transfer_or_run_check(ILeaf *other, IThreshOpt *thresh_opt,
49  IDataProvider *dprov) {
50  auto *cl_ot = dynamic_cast<RegressionLeaf *>(other);
51  if (cl_ot == nullptr) {
52  cl_ot->annot_dim = annot_dim;
53  } else {
54  other->is_compatible_with(*dprov);
55  other->is_compatible_with(*thresh_opt);
56  }
57  }
58  void make_leaf(const TodoMark &todo_info, const IDataProvider &data_provider,
59  Desk *desk) const;
60  inline size_t get_result_columns(const size_t &n_trees,
61  const bool &predict_proba,
62  const bool & /*for_forest*/) const {
63  DLOG(INFO) << "Determining result columns. Summarize: " << summarize
64  << ", predict_proba: " << predict_proba
65  << ", n_trees: " << n_trees;
66  if (annot_dim == 0)
67  throw ForpyException("This leaf manager has not been initialized yet!");
68  if (predict_proba) {
69  if (!store_variance)
70  throw ForpyException(
71  "You called `predict_proba` but didn't enable "
72  "storing the variances. Use `store_variance=True` "
73  "for predictor construction!");
74  if (summarize) {
75  VLOG(23) << "Result columns: " << 2 * annot_dim;
76  return 2 * annot_dim;
77  } else {
78  VLOG(23) << "Result columns: " << n_trees * 2 * annot_dim;
79  return n_trees * 2 * annot_dim;
80  }
81  } else {
82  VLOG(23) << "Result columns: " << annot_dim;
83  return annot_dim;
84  }
85  };
86  inline Data<Mat> get_result_type(const bool & /*predict_proba*/,
87  const bool & /*for_forest*/) const {
88  Data<Mat> ret_mat;
89  ret_mat.set<Mat<float>>();
90  return ret_mat;
91  };
92  void get_result(const id_t &node_id, Data<MatRef> &target,
93  const bool &predict_proba, const bool &for_forest) const;
94  inline const std::vector<Mat<float>> *get_map() const {
95  return &leaf_regression_map;
96  };
97  void get_result(const std::vector<Data<Mat>> &leaf_results,
98  Data<MatRef> &target_v,
99  const Vec<float> &weights = Vec<float>(),
100  const bool &predict_proba = false) const;
101  inline void ensure_capacity(const size_t &n) {
102  leaf_regression_map.resize(n);
103  };
104  inline void finalize_capacity(const size_t &n) { ensure_capacity(n); };
106 
107  bool operator==(const ILeaf &rhs) const;
108  inline friend std::ostream &operator<<(std::ostream &stream,
109  const RegressionLeaf &self) {
110  stream << "forpy::RegressionLeaf[" << self.leaf_regression_map.size()
111  << " stored]";
112  return stream;
113  };
114 
115  private:
116  friend class cereal::access;
117  template <class Archive>
118  void serialize(Archive &ar, const uint &) {
119  ar(cereal::make_nvp("base", cereal::base_class<ILeaf>(this)),
120  CEREAL_NVP(leaf_regression_map), CEREAL_NVP(annot_dim));
121  }
122 
123  std::vector<Mat<float>> leaf_regression_map;
124  size_t annot_dim;
126  bool summarize;
127 };
128 }; // namespace forpy
129 
131 #endif // FORPY_LEAFS_REGRESSIONLEAF_H_
Find an optimal threshold.
Definition: ithreshopt.h:23
virtual bool is_compatible_with(const IDataProvider &)
Checks compatibility with a certain IDataProvider.
Definition: ileaf.h:34
void transfer_or_run_check(ILeaf *other, IThreshOpt *thresh_opt, IDataProvider *dprov)
Interface implementation.
CEREAL_REGISTER_TYPE(forpy::RegressionLeaf)
A data provider for the training of one tree.
Definition: idataprovider.h:22
size_t id_t
Element id type.
Definition: types.h:106
void serialize(Archive &ar, const uint &)
typename mu::variant< Empty, STOT< float >, STOT< double >, STOT< uint >, STOT< uint8_t > > Data
Storing a variant of the provided data container type.
Definition: storage.h:126
void get_result(const id_t &node_id, Data< MatRef > &target, const bool &predict_proba, const bool &for_forest) const
Interface implementation.
std::shared_ptr< ILeaf > create_duplicate() const
Interface implementation.
friend std::ostream & operator<<(std::ostream &stream, const RegressionLeaf &self)
Stores the parameters for one marked tree node.
Definition: types.h:152
size_t get_result_columns(const size_t &n_trees, const bool &predict_proba, const bool &) const
Interface implementation.
size_t get_annot_vec_dim() const
Get the annotation vector dimension.
Definition: idataprovider.h:82
void ensure_capacity(const size_t &n)
Interface implementation.
const std::vector< Mat< float > > * get_map() const
Interface implementation.
Stores and returns leaf values, and combines them to forest results.
Definition: ileaf.h:23
Manages the leaf nodes of regression trees.
RegressionLeaf(const bool &store_variance=false, const bool &summarize=false)
bool is_compatible_with(const IDataProvider &data_provider)
Interface implementation.
Data< Mat > get_result_type(const bool &, const bool &) const
Interface implementation.
void finalize_capacity(const size_t &n)
Interface implementation.
Eigen::Matrix< DT, Eigen::Dynamic, 1, Eigen::ColMajor > Vec
Definition: types.h:73
Eigen::Matrix< DT, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > Mat
Parameterized Matrix type (row major).
Definition: types.h:52
bool operator==(const ILeaf &rhs) const
Main thread desk object.
Definition: desk.h:201
std::vector< Mat< float > > leaf_regression_map
bool is_compatible_with(const IThreshOpt &)
Interface implementation.
unsigned int uint
Convenience typedef for unsigned int.
Definition: types.h:113
void make_leaf(const TodoMark &todo_info, const IDataProvider &data_provider, Desk *desk) const
Interface implementation.