forpy  2
fastdprov.h
Go to the documentation of this file.
1 /* Author: Christoph Lassner. */
2 #pragma once
3 #ifndef FORPY_DATA_PROVIDERS_FASTDPROV_H_
4 #define FORPY_DATA_PROVIDERS_FASTDPROV_H_
5 
6 #include <functional>
7 #include <vector>
8 #include "../global.h"
9 #include "../types.h"
10 #include "../util/checks.h"
11 #include "./idataprovider.h"
12 
13 namespace forpy {
19 class FastDProv : public IDataProvider {
20  public:
33  const std::shared_ptr<std::vector<float> const> &weights_store);
34 
49  const std::shared_ptr<std::vector<float> const> &weights_store);
50 
52  inline std::vector<id_t> &get_initial_sample_list() { return *training_ids; };
54 
55  inline size_t get_n_samples() const { return training_ids->size(); };
56 
57  Data<VecCMap> get_feature(const size_t & /*feat_idx*/) const;
58 
59  inline Data<MatCRef> get_annotations() const { return annotations; };
60 
61  inline void set_annotations(const DataStore<Mat> &new_annotation_store) {
62  annotation_store = new_annotation_store;
63  annotation_store.match(
64  [&](const auto &new_annotations) { annotations = *new_annotations; });
65  };
66 
67  inline std::shared_ptr<const std::vector<float>> get_weights() const {
68  return weights_store;
69  }
70 
71  std::vector<std::shared_ptr<IDataProvider>> create_tree_providers(
72  usage_map_t &usage_map);
74 
75  inline friend std::ostream &operator<<(std::ostream &stream,
76  const FastDProv &self) {
77  stream << "forpy::FastDProv[" << self.get_n_samples() << " samples, "
78  << self.get_feat_vec_dim() << " -> " << self.get_annot_vec_dim()
79  << "]";
80  return stream;
81  };
82  bool operator==(const IDataProvider &rhs) const;
83 
84  private:
86  inline FastDProv(){};
87 
94  const std::shared_ptr<std::vector<float> const> &weights_store,
95  std::shared_ptr<std::vector<id_t>> &training_ids);
96 
98  void checks(const Data<MatCRef> &data,
99  const Data<MatCRef> &annotations) const;
100 
103  void init_from_arrays();
104 
117  std::shared_ptr<std::vector<float> const> weights_store;
120  std::shared_ptr<std::vector<id_t>> training_ids;
121 };
122 } // namespace forpy
123 #endif // FORPY_DATA_PROVIDERS_FASTDPROV_H_
size_t feat_vec_dim
The dimension of one feature vector.
void checks(const Data< MatCRef > &data, const Data< MatCRef > &annotations) const
Perform all necessary checks before constructing an instance.
friend std::ostream & operator<<(std::ostream &stream, const FastDProv &self)
Definition: fastdprov.h:75
std::vector< std::shared_ptr< IDataProvider > > create_tree_providers(usage_map_t &usage_map)
forpy::IDataProvider function implementation.
A data provider for the training of one tree.
Definition: idataprovider.h:22
void init_from_arrays()
Perform the initialization once the FastDProv::data and FastDProv::annotations have been set...
Data< MatCRef > data
A reference to the data.
Definition: fastdprov.h:113
Data< MatCRef > annotations
A reference to the annotations.
Definition: fastdprov.h:115
std::vector< std::pair< std::shared_ptr< std::vector< size_t > >, std::shared_ptr< std::vector< float > const > > > usage_map_t
Describes how each sample is used for each tree.
Definition: types.h:192
typename mu::variant< Empty, STOT< float >, STOT< double >, STOT< uint >, STOT< uint8_t > > Data
Storing a variant of the provided data container type.
Definition: storage.h:126
Data< VecCMap > get_feature(const size_t &) const
forpy::IDataProvider function implementation.
std::shared_ptr< std::vector< float > const > weights_store
Weight storage.
Definition: fastdprov.h:117
typename mu::variant< std::shared_ptr< const STOT< float > >, std::shared_ptr< const STOT< double > >, std::shared_ptr< const STOT< uint > >, std::shared_ptr< const STOT< uint8_t > >> DataStore
Variant for storing shared_ptrs to the stored data matrix type.
Definition: storage.h:119
DataStore< Mat > annotation_store
Definition: fastdprov.h:111
std::vector< id_t > & get_initial_sample_list()
forpy::IDataProvider function implementation.
Definition: fastdprov.h:53
std::shared_ptr< const std::vector< float > > get_weights() const
forpy::IDataProvider function implementation.
Definition: fastdprov.h:67
Use the provided data plain throughout the training.
Definition: fastdprov.h:19
DataStore< Mat > data_store
Data storage. If ownership of the data can&#39;t be shared, it is copied here.
Definition: fastdprov.h:108
bool operator==(const IDataProvider &rhs) const
FastDProv()
Constructor for deserialization.
Definition: fastdprov.h:86
size_t annot_vec_dim
The dimension of one annotation vector.
std::shared_ptr< std::vector< id_t > > training_ids
Definition: fastdprov.h:120
size_t get_n_samples() const
forpy::IDataProvider function implementation.
Definition: fastdprov.h:55
Data< MatCRef > get_annotations() const
forpy::IDataProvider function implementation.
Definition: fastdprov.h:59
void set_annotations(const DataStore< Mat > &new_annotation_store)
forpy::IDataProvider function implementation.
Definition: fastdprov.h:61