forpy  2
classification_opt.h
Go to the documentation of this file.
1 /* Author: Christoph Lassner. */
2 #pragma once
3 #ifndef FORPY_THRESHOLD_OPTIMIZERS_CLASSIFICATION_OPT_H_
4 #define FORPY_THRESHOLD_OPTIMIZERS_CLASSIFICATION_OPT_H_
5 
6 #include "../global.h"
7 #include "../util/serialization/basics.h"
8 
9 #include "../impurities/ientropyfunction.h"
10 #include "../impurities/inducedentropy.h"
11 #include "../types.h"
12 #include "../util/desk.h"
13 #include "./ithreshopt.h"
14 
15 namespace forpy {
16 
17 #pragma clang diagnostic push
18 #pragma clang diagnostic ignored "-Wunused-variable"
19 
20 const int DLOG_COPT_V = 100;
22 const size_t LOG_COPT_NID = 0;
23 const bool LOG_COPT_ALLN = false;
25 
35 #ifdef FORPY_SKLEARN_COMPAT
36 const float CLASSOPT_EPS = 1E-7;
37 #else
38 const float CLASSOPT_EPS = 1E-7f;
39 #endif
40 #pragma clang diagnostic pop
41 
55 class ClassificationOpt : public IThreshOpt {
56  public:
66  ClassificationOpt(const size_t &n_thresholds = 0,
67  const float &gain_threshold = 1E-7f,
68  const std::shared_ptr<IEntropyFunction> &entropy_function =
69  std::make_shared<InducedEntropy>(2));
70 
72  virtual std::shared_ptr<IThreshOpt> create_duplicate(
74  const uint & /*random_seed*/) const {
75  return std::make_shared<ClassificationOpt>(n_thresholds, gain_threshold,
76  entropy_func);
77  }
78  virtual void check_annotations(IDataProvider *dprov);
79  inline void transfer_or_run_check(IThreshOpt *other, IDataProvider *dprov) {
80  auto *copt_ot = dynamic_cast<ClassificationOpt *>(other);
81  if (copt_ot != nullptr) {
82  copt_ot->n_classes = n_classes;
83  copt_ot->true_max = true_max;
84  copt_ot->class_transl_ptr = class_transl_ptr;
85  } else
86  other->check_annotations(dprov);
87  };
88  virtual void full_entropy(const IDataProvider &dprov, Desk *) const;
89  virtual void optimize(Desk *) const;
90  float get_gain_threshold_for(const size_t & /*node_id*/) {
91  return gain_threshold;
92  };
94 
96  inline size_t get_n_classes() const { return n_classes; };
97 
98  inline std::shared_ptr<std::vector<uint>> get_class_translation() const {
99  return class_transl_ptr;
100  };
101 
102  inline uint get_true_max_class() const { return true_max; };
103 
104  inline friend std::ostream &operator<<(std::ostream &stream,
105  const ClassificationOpt & /*self*/) {
106  stream << "forpy::ClassificationOpt";
107  return stream;
108  };
109  bool operator==(const IThreshOpt &rhs) const;
110 
111  protected:
112  size_t n_thresholds;
113  size_t n_classes;
115  std::shared_ptr<IEntropyFunction> entropy_func;
116  std::shared_ptr<std::vector<uint>> class_transl_ptr;
117  int true_max;
118 
119  private:
120  template <typename IT>
121  inline SplitOptRes<IT> &optimize__setup(DeciderDesk &d) const;
122  template <typename IT>
123  inline void optimize__sort(DeciderDesk &d) const;
124  template <typename IT>
125  inline std::unique_ptr<std::vector<IT>> optimize__thresholds(Desk *d) const;
126  friend class cereal::access;
127  template <class Archive>
128  void serialize(Archive &ar, const uint &) {
129  ar(cereal::make_nvp("base", cereal::base_class<IThreshOpt>(this)),
130  CEREAL_NVP(n_thresholds), CEREAL_NVP(n_classes),
131  CEREAL_NVP(gain_threshold), CEREAL_NVP(entropy_func),
132  CEREAL_NVP(class_transl_ptr), CEREAL_NVP(true_max));
133  }
134 
136 };
137 } // namespace forpy
138 
140 #endif // FORPY_THRESHOLD_OPTIMIZERS_CLASSIFICATION_OPT_H_
Find an optimal threshold.
Definition: ithreshopt.h:23
Desk for decider training.
Definition: desk.h:61
std::shared_ptr< IEntropyFunction > entropy_func
A data provider for the training of one tree.
Definition: idataprovider.h:22
virtual std::shared_ptr< IThreshOpt > create_duplicate(const uint &) const
Interface implementation.
virtual void optimize(Desk *) const
Interface implementation.
const size_t LOG_COPT_NID
Variables to control debugging and log output for the forpy::RegressionOpt.
float get_gain_threshold_for(const size_t &)
Interface implementation.
friend class cereal::access
virtual void full_entropy(const IDataProvider &dprov, Desk *) const
Interface implementation.
ClassificationOpt(const size_t &n_thresholds=0, const float &gain_threshold=1E-7f, const std::shared_ptr< IEntropyFunction > &entropy_function=std::make_shared< InducedEntropy >(2))
const bool LOG_COPT_ALLN
Variables to control debugging and log output for the forpy::RegressionOpt.
CEREAL_REGISTER_TYPE(forpy::ClassificationOpt)
friend std::ostream & operator<<(std::ostream &stream, const ClassificationOpt &)
std::shared_ptr< std::vector< uint > > class_transl_ptr
void serialize(Archive &ar, const uint &)
std::shared_ptr< std::vector< uint > > get_class_translation() const
const int DLOG_COPT_V
Variables to control debugging and log output for the forpy::RegressionOpt.
Optimize split thresholds to optimize classification results.
void transfer_or_run_check(IThreshOpt *other, IDataProvider *dprov)
Interface implementation.
SplitOptRes< IT > & optimize__setup(DeciderDesk &d) const
virtual void check_annotations(IDataProvider *dprov) VIRTUAL_VOID
Validate annotations for usability with this optimizer.
std::unique_ptr< std::vector< IT > > optimize__thresholds(Desk *d) const
Main thread desk object.
Definition: desk.h:201
DISALLOW_COPY_AND_ASSIGN(ClassificationOpt)
unsigned int uint
Convenience typedef for unsigned int.
Definition: types.h:113
const float CLASSOPT_EPS
Classification epsilon. No differences less than this are considered existent. This is relevant for: ...
bool operator==(const IThreshOpt &rhs) const
virtual void check_annotations(IDataProvider *dprov)
Interface implementation.
size_t get_n_classes() const
Get the determined number of classes.
void optimize__sort(DeciderDesk &d) const