forpy  2
fastdecider.h
Go to the documentation of this file.
1 /* Author: Christoph Lassner. */
2 #pragma once
3 #ifndef FORPY_DECIDERS_FASTDECIDER_H_
4 #define FORPY_DECIDERS_FASTDECIDER_H_
5 
6 #include "../global.h"
7 
8 #include "../util/serialization/basics.h"
9 
10 #include <limits>
11 #include <typeinfo>
12 #include <utility>
13 #include <vector>
14 
15 #include "../types.h"
16 #include "../util/desk.h"
17 #include "../util/storage.h"
18 #include "./idecider.h"
19 
20 namespace forpy {
21 
22 #pragma clang diagnostic push
23 #pragma clang diagnostic ignored "-Wunused-variable"
24 const int DLOG_FD_V = 100;
25 const size_t LOG_FD_NID = 12043;
26 const bool LOG_FD_ALLN = true;
27 #pragma clang diagnostic pop
28 
45 class FastDecider : public IDecider {
46  public:
64  FastDecider(const std::shared_ptr<IThreshOpt> &threshold_optimizer = nullptr,
65  const size_t &n_valid_features_to_use = 0,
66  const bool &autoscale_valid_features = false);
67 
68  virtual std::shared_ptr<IDecider> create_duplicate(
69  const uint &random_seed) const {
70  return std::make_shared<FastDecider>(
71  threshold_optimizer->create_duplicate(random_seed),
74  : 0,
76  }
77 
78  inline bool is_compatible_with(const IDataProvider &dprov) {
79  if (n_valids_to_use > dprov.get_feat_vec_dim()) {
80  LOG(WARNING) << "`n_valid_features_to_use` is greater than the number of "
81  << "features (" << n_valids_to_use << ">"
82  << dprov.get_feat_vec_dim() << ")! I'm reducing "
83  << "the number accordingly.";
85  }
86  if (n_valids_to_use == 0) {
88  n_valids_to_use = std::round(std::sqrt(dprov.get_feat_vec_dim()));
89  } else {
91  }
92  }
93 
94  if (dprov.get_feat_vec_dim() != data_dim)
95  throw ForpyException("Incompatible data provider detected!");
96  return true;
97  }
98 
99  inline void transfer_or_run_check(const std::shared_ptr<IDecider> &other,
100  IDataProvider *dprov) {
101  threshold_optimizer->transfer_or_run_check(other->get_threshopt().get(),
102  dprov);
103  other->set_data_dim(data_dim);
104  auto *ot_fd = dynamic_cast<FastDecider *>(other.get());
105  if (ot_fd == nullptr)
106  other->is_compatible_with(*dprov);
107  else
108  ot_fd->n_valids_to_use = n_valids_to_use;
109  }
110 
111  inline void set_data_dim(const size_t &val) { data_dim = val; };
112 
113  inline void ensure_capacity(const size_t &n_samples) {
114  node_to_featsel.resize(n_samples);
115  node_to_thresh_v.match([&n_samples](auto &vec) { vec.resize(n_samples); });
116  };
117 
118  inline void finalize_capacity(const size_t &size) { ensure_capacity(size); };
119 
120  void make_node(const TodoMark &todo_info, const uint &min_samples_at_leaf,
121  const IDataProvider &data_provider, Desk *d) const;
122 
123  bool decide(
124  const id_t &node_id, const Data<MatCRef> &data_v,
125  const std::function<void(void *)> &decision_param_transf = nullptr) const;
126 
127  bool supports_weights() const;
128 
129  size_t get_data_dim() const;
130 
131  std::shared_ptr<IThreshOpt> get_threshopt() const;
132 
133  bool operator==(const IDecider &rhs) const;
134 
135  inline friend std::ostream &operator<<(std::ostream &stream,
136  const FastDecider &self) {
137  stream << "forpy::FastDecider[" << self.node_to_featsel.size()
138  << " stored]";
139  return stream;
140  };
141  std::pair<const std::vector<size_t> *,
142  const mu::variant<std::vector<float>, std::vector<double>,
143  std::vector<uint32_t>, std::vector<uint8_t>> *>
144  get_maps() const;
145 
146  private:
147  friend class cereal::access;
148  template <class Archive>
149  void serialize(Archive &ar, const uint &) {
150  ar(cereal::make_nvp("base", cereal::base_class<IDecider>(this)),
151  CEREAL_NVP(threshold_optimizer), CEREAL_NVP(n_valids_to_use),
152  CEREAL_NVP(autoscale_valid_features), CEREAL_NVP(node_to_featsel),
153  CEREAL_NVP(node_to_thresh_v), CEREAL_NVP(data_dim));
154  }
155 
157  void _make_node__checks(const TodoMark &todo_info,
158  const IDataProvider &data_provider,
159  const uint &min_samples_at_leaf, Desk *d) const;
160 
161  void _make_node__opt(const IDataProvider &dprov, Desk *d) const;
162 
163  void _make_node__postprocess(const IDataProvider &dprov, Desk *d) const;
164 
165  // Fields.
166  std::shared_ptr<IThreshOpt> threshold_optimizer;
169  std::vector<size_t> node_to_featsel;
170  mu::variant<std::vector<float>, std::vector<double>, std::vector<uint32_t>,
171  std::vector<uint8_t>>
173  size_t data_dim;
174 };
175 }; // namespace forpy
176 
178 #endif // FORPY_DECIDERS_FASTDECIDER_H_
void make_node(const TodoMark &todo_info, const uint &min_samples_at_leaf, const IDataProvider &data_provider, Desk *d) const
Optimizes a classifier for the given data and stores the params.
void _make_node__postprocess(const IDataProvider &dprov, Desk *d) const
A classifier manager for weak classifiers with a filter function, a feature calculation function and ...
Definition: fastdecider.h:45
const size_t LOG_FD_NID
Definition: fastdecider.h:25
bool is_compatible_with(const IDataProvider &dprov)
Definition: fastdecider.h:78
A data provider for the training of one tree.
Definition: idataprovider.h:22
const int DLOG_FD_V
Definition: fastdecider.h:24
size_t id_t
Element id type.
Definition: types.h:106
CEREAL_REGISTER_TYPE(forpy::FastDecider)
bool supports_weights() const
Whether this classifier manager supports sample weights during training.
typename mu::variant< Empty, STOT< float >, STOT< double >, STOT< uint >, STOT< uint8_t > > Data
Storing a variant of the provided data container type.
Definition: storage.h:126
mu::variant< std::vector< float >, std::vector< double >, std::vector< uint32_t >, std::vector< uint8_t > > node_to_thresh_v
Definition: fastdecider.h:172
size_t get_data_dim() const
void serialize(Archive &ar, const uint &)
Definition: fastdecider.h:149
void _make_node__checks(const TodoMark &todo_info, const IDataProvider &data_provider, const uint &min_samples_at_leaf, Desk *d) const
bool autoscale_valid_features
Definition: fastdecider.h:168
void finalize_capacity(const size_t &size)
Definition: fastdecider.h:118
Stores the parameters for one marked tree node.
Definition: types.h:152
void ensure_capacity(const size_t &n_samples)
Definition: fastdecider.h:113
friend class cereal::access
Definition: fastdecider.h:147
FastDecider(const std::shared_ptr< IThreshOpt > &threshold_optimizer=nullptr, const size_t &n_valid_features_to_use=0, const bool &autoscale_valid_features=false)
bool operator==(const IDecider &rhs) const
std::shared_ptr< IThreshOpt > get_threshopt() const
const bool LOG_FD_ALLN
Definition: fastdecider.h:26
size_t get_feat_vec_dim() const
Get the feature vector dimension.
Definition: idataprovider.h:77
bool decide(const id_t &node_id, const Data< MatCRef > &data_v, const std::function< void(void *)> &decision_param_transf=nullptr) const
Makes a decision for a node with already optimized parameters.
std::pair< const std::vector< size_t > *, const mu::variant< std::vector< float >, std::vector< double >, std::vector< uint32_t >, std::vector< uint8_t > > * > get_maps() const
std::vector< size_t > node_to_featsel
Definition: fastdecider.h:169
void transfer_or_run_check(const std::shared_ptr< IDecider > &other, IDataProvider *dprov)
Definition: fastdecider.h:99
virtual std::shared_ptr< IDecider > create_duplicate(const uint &random_seed) const
Definition: fastdecider.h:68
Main thread desk object.
Definition: desk.h:201
friend std::ostream & operator<<(std::ostream &stream, const FastDecider &self)
Definition: fastdecider.h:135
Interface for the decider. It does the optimization of the deciding classifier for each node and stor...
Definition: idecider.h:31
unsigned int uint
Convenience typedef for unsigned int.
Definition: types.h:113
std::shared_ptr< IThreshOpt > threshold_optimizer
Definition: fastdecider.h:166
void _make_node__opt(const IDataProvider &dprov, Desk *d) const
void set_data_dim(const size_t &val)
Definition: fastdecider.h:111