Stan Math Library  2.15.0
reverse mode automatic differentiation
log_softmax.hpp
Go to the documentation of this file.
1 #ifndef STAN_MATH_REV_MAT_FUN_LOG_SOFTMAX_HPP
2 #define STAN_MATH_REV_MAT_FUN_LOG_SOFTMAX_HPP
3 
8 #include <stan/math/rev/core.hpp>
9 #include <cmath>
10 #include <vector>
11 
12 namespace stan {
13  namespace math {
14 
15  namespace {
16 
17  class log_softmax_elt_vari : public vari {
18  private:
19  vari** alpha_;
20  const double* softmax_alpha_;
21  const int size_; // array sizes
22  const int idx_; // in in softmax output
23 
24  public:
25  log_softmax_elt_vari(double val,
26  vari** alpha,
27  const double* softmax_alpha,
28  int size,
29  int idx)
30  : vari(val),
31  alpha_(alpha),
32  softmax_alpha_(softmax_alpha),
33  size_(size),
34  idx_(idx) {
35  }
36  void chain() {
37  for (int m = 0; m < size_; ++m) {
38  if (m == idx_)
39  alpha_[m]->adj_ += adj_ * (1 - softmax_alpha_[m]);
40  else
41  alpha_[m]->adj_ -= adj_ * softmax_alpha_[m];
42  }
43  }
44  };
45 
46  }
47 
58  inline Eigen::Matrix<var, Eigen::Dynamic, 1>
59  log_softmax(const Eigen::Matrix<var, Eigen::Dynamic, 1>& alpha) {
60  using Eigen::Matrix;
61  using Eigen::Dynamic;
62 
63  check_nonzero_size("log_softmax", "alpha", alpha);
64 
65  // TODO(carpenter): replace with array alloc
66  vari** alpha_vi_array
67  = reinterpret_cast<vari**>
68  (vari::operator new(sizeof(vari*) * alpha.size()));
69  for (int i = 0; i < alpha.size(); ++i)
70  alpha_vi_array[i] = alpha(i).vi_;
71 
72  Matrix<double, Dynamic, 1> alpha_d(alpha.size());
73  for (int i = 0; i < alpha_d.size(); ++i)
74  alpha_d(i) = alpha(i).val();
75 
76  // fold logic of math::softmax() and math::log_softmax()
77  // to save computations
78 
79  Matrix<double, Dynamic, 1> softmax_alpha_d(alpha_d.size());
80  Matrix<double, Dynamic, 1> log_softmax_alpha_d(alpha_d.size());
81 
82  double max_v = alpha_d.maxCoeff();
83 
84  double sum = 0.0;
85  for (int i = 0; i < alpha_d.size(); ++i) {
86  softmax_alpha_d(i) = std::exp(alpha_d(i) - max_v);
87  sum += softmax_alpha_d(i);
88  }
89 
90  for (int i = 0; i < alpha_d.size(); ++i)
91  softmax_alpha_d(i) /= sum;
92  double log_sum = std::log(sum);
93 
94  for (int i = 0; i < alpha_d.size(); ++i)
95  log_softmax_alpha_d(i) = (alpha_d(i) - max_v) - log_sum;
96 
97  // end fold
98  // TODO(carpenter): replace with array alloc
99  double* softmax_alpha_d_array
100  = reinterpret_cast<double*>
101  (vari::operator new(sizeof(double) * alpha_d.size()));
102 
103  for (int i = 0; i < alpha_d.size(); ++i)
104  softmax_alpha_d_array[i] = softmax_alpha_d(i);
105 
106  Matrix<var, Dynamic, 1> log_softmax_alpha(alpha.size());
107  for (int k = 0; k < log_softmax_alpha.size(); ++k)
108  log_softmax_alpha(k)
109  = var(new log_softmax_elt_vari(log_softmax_alpha_d[k],
110  alpha_vi_array,
111  softmax_alpha_d_array,
112  alpha.size(),
113  k));
114  return log_softmax_alpha;
115  }
116 
117  }
118 }
119 #endif
fvar< T > sum(const std::vector< fvar< T > > &m)
Return the sum of the entries of the specified standard vector.
Definition: sum.hpp:20
void check_nonzero_size(const char *function, const char *name, const T_y &y)
Check if the specified matrix/vector is of non-zero size.
fvar< T > log(const fvar< T > &x)
Definition: log.hpp:14
The variable implementation base class.
Definition: vari.hpp:30
Independent (input) and dependent (output) variables for gradients.
Definition: var.hpp:30
Eigen::Matrix< fvar< T >, Eigen::Dynamic, 1 > log_softmax(const Eigen::Matrix< fvar< T >, Eigen::Dynamic, 1 > &alpha)
Definition: log_softmax.hpp:16
fvar< T > exp(const fvar< T > &x)
Definition: exp.hpp:10
size_t size_
Definition: dot_self.hpp:18
int size(const std::vector< T > &x)
Return the size of the specified standard vector.
Definition: size.hpp:17

     [ Stan Home Page ] © 2011–2016, Stan Development Team.