Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3Penalty classes for Generalized Additive Models 

4 

5Author: Luca Puggini 

6Author: Josef Perktold 

7 

8""" 

9 

10import numpy as np 

11from scipy.linalg import block_diag 

12from statsmodels.base._penalties import Penalty 

13 

14 

15class UnivariateGamPenalty(Penalty): 

16 """ 

17 Penalty for smooth term in Generalized Additive Models 

18 

19 Parameters 

20 ---------- 

21 univariate_smoother : instance 

22 instance of univariate smoother or spline class 

23 alpha : float 

24 default penalty weight, alpha can be provided to each method 

25 weights: 

26 TODO: not used and verified, might be removed 

27 

28 Attributes 

29 ---------- 

30 Parameters are stored, additionally 

31 nob s: The number of samples used during the estimation 

32 n_columns : number of columns in smoother basis 

33 """ 

34 

35 def __init__(self, univariate_smoother, alpha=1, weights=1): 

36 self.weights = weights 

37 self.alpha = alpha 

38 self.univariate_smoother = univariate_smoother 

39 self.nobs = self.univariate_smoother.nobs 

40 self.n_columns = self.univariate_smoother.dim_basis 

41 

42 def func(self, params, alpha=None): 

43 """evaluate penalization at params 

44 

45 Parameters 

46 ---------- 

47 params : ndarray 

48 coefficients for the spline basis in the regression model 

49 alpha : float 

50 default penalty weight 

51 

52 Returns 

53 ------- 

54 func : float 

55 value of the penalty evaluated at params 

56 """ 

57 if alpha is None: 

58 alpha = self.alpha 

59 

60 f = params.dot(self.univariate_smoother.cov_der2.dot(params)) 

61 return alpha * f / self.nobs 

62 

63 def deriv(self, params, alpha=None): 

64 """evaluate derivative of penalty with respect to params 

65 

66 Parameters 

67 ---------- 

68 params : ndarray 

69 coefficients for the spline basis in the regression model 

70 alpha : float 

71 default penalty weight 

72 

73 Returns 

74 ------- 

75 deriv : ndarray 

76 derivative, gradient of the penalty with respect to params 

77 """ 

78 if alpha is None: 

79 alpha = self.alpha 

80 

81 d = 2 * alpha * np.dot(self.univariate_smoother.cov_der2, params) 

82 d /= self.nobs 

83 return d 

84 

85 def deriv2(self, params, alpha=None): 

86 """evaluate second derivative of penalty with respect to params 

87 

88 Parameters 

89 ---------- 

90 params : ndarray 

91 coefficients for the spline basis in the regression model 

92 alpha : float 

93 default penalty weight 

94 

95 Returns 

96 ------- 

97 deriv2 : ndarray, 2-Dim 

98 second derivative, hessian of the penalty with respect to params 

99 """ 

100 if alpha is None: 

101 alpha = self.alpha 

102 

103 d2 = 2 * alpha * self.univariate_smoother.cov_der2 

104 d2 /= self.nobs 

105 return d2 

106 

107 def penalty_matrix(self, alpha=None): 

108 """penalty matrix for the smooth term of a GAM 

109 

110 Parameters 

111 ---------- 

112 alpha : list of floats or None 

113 penalty weights 

114 

115 Returns 

116 ------- 

117 penalty matrix 

118 square penalty matrix for quadratic penalization. The number 

119 of rows and columns are equal to the number of columns in the 

120 smooth terms, i.e. the number of parameters for this smooth 

121 term in the regression model 

122 """ 

123 if alpha is None: 

124 alpha = self.alpha 

125 

126 return alpha * self.univariate_smoother.cov_der2 

127 

128 

129class MultivariateGamPenalty(Penalty): 

130 """ 

131 Penalty for Generalized Additive Models 

132 

133 Parameters 

134 ---------- 

135 multivariate_smoother : instance 

136 instance of additive smoother or spline class 

137 alpha : list of float 

138 default penalty weight, list with length equal to the number of smooth 

139 terms. ``alpha`` can also be provided to each method. 

140 weights: array_like 

141 currently not used 

142 is a list of doubles of the same length as alpha or a list 

143 of ndarrays where each component has the length equal to the number 

144 of columns in that component 

145 start_idx : int 

146 number of parameters that come before the smooth terms. If the model 

147 has a linear component, then the parameters for the smooth components 

148 start at ``start_index``. 

149 

150 Attributes 

151 ---------- 

152 Parameters are stored, additionally 

153 nob s: The number of samples used during the estimation 

154 

155 dim_basis : number of columns of additive smoother. Number of columns 

156 in all smoothers. 

157 k_variables : number of smooth terms 

158 k_params : total number of parameters in the regression model 

159 """ 

160 

161 def __init__(self, multivariate_smoother, alpha, weights=None, 

162 start_idx=0): 

163 

164 if len(multivariate_smoother.smoothers) != len(alpha): 

165 msg = ('all the input values should be of the same length.' 

166 ' len(smoothers)=%d, len(alphas)=%d') % ( 

167 len(multivariate_smoother.smoothers), len(alpha)) 

168 raise ValueError(msg) 

169 

170 self.multivariate_smoother = multivariate_smoother 

171 self.dim_basis = self.multivariate_smoother.dim_basis 

172 self.k_variables = self.multivariate_smoother.k_variables 

173 self.nobs = self.multivariate_smoother.nobs 

174 self.alpha = alpha 

175 self.start_idx = start_idx 

176 self.k_params = start_idx + self.dim_basis 

177 

178 # TODO: Review this, 

179 if weights is None: 

180 # weights should have total length as params 

181 # but it can also be scalar in individual component 

182 self.weights = [1. for _ in range(self.k_variables)] 

183 else: 

184 import warnings 

185 warnings.warn('weights is currently ignored') 

186 self.weights = weights 

187 

188 self.mask = [np.zeros(self.k_params, dtype=np.bool_) 

189 for _ in range(self.k_variables)] 

190 param_count = start_idx 

191 for i, smoother in enumerate(self.multivariate_smoother.smoothers): 

192 # the mask[i] contains a vector of length k_columns. The index 

193 # corresponding to the i-th input variable are set to True. 

194 self.mask[i][param_count: param_count + smoother.dim_basis] = True 

195 param_count += smoother.dim_basis 

196 

197 self.gp = [] 

198 for i in range(self.k_variables): 

199 gp = UnivariateGamPenalty(self.multivariate_smoother.smoothers[i], 

200 weights=self.weights[i], 

201 alpha=self.alpha[i]) 

202 self.gp.append(gp) 

203 

204 def func(self, params, alpha=None): 

205 """evaluate penalization at params 

206 

207 Parameters 

208 ---------- 

209 params : ndarray 

210 coefficients in the regression model 

211 alpha : float or list of floats 

212 penalty weights 

213 

214 Returns 

215 ------- 

216 func : float 

217 value of the penalty evaluated at params 

218 """ 

219 if alpha is None: 

220 alpha = [None] * self.k_variables 

221 

222 cost = 0 

223 for i in range(self.k_variables): 

224 params_i = params[self.mask[i]] 

225 cost += self.gp[i].func(params_i, alpha=alpha[i]) 

226 

227 return cost 

228 

229 def deriv(self, params, alpha=None): 

230 """evaluate derivative of penalty with respect to params 

231 

232 Parameters 

233 ---------- 

234 params : ndarray 

235 coefficients in the regression model 

236 alpha : list of floats or None 

237 penalty weights 

238 

239 Returns 

240 ------- 

241 deriv : ndarray 

242 derivative, gradient of the penalty with respect to params 

243 """ 

244 if alpha is None: 

245 alpha = [None] * self.k_variables 

246 

247 grad = [np.zeros(self.start_idx)] 

248 for i in range(self.k_variables): 

249 params_i = params[self.mask[i]] 

250 grad.append(self.gp[i].deriv(params_i, alpha=alpha[i])) 

251 

252 return np.concatenate(grad) 

253 

254 def deriv2(self, params, alpha=None): 

255 """evaluate second derivative of penalty with respect to params 

256 

257 Parameters 

258 ---------- 

259 params : ndarray 

260 coefficients in the regression model 

261 alpha : list of floats or None 

262 penalty weights 

263 

264 Returns 

265 ------- 

266 deriv2 : ndarray, 2-Dim 

267 second derivative, hessian of the penalty with respect to params 

268 """ 

269 if alpha is None: 

270 alpha = [None] * self.k_variables 

271 

272 deriv2 = [np.zeros((self.start_idx, self.start_idx))] 

273 for i in range(self.k_variables): 

274 params_i = params[self.mask[i]] 

275 deriv2.append(self.gp[i].deriv2(params_i, alpha=alpha[i])) 

276 

277 return block_diag(*deriv2) 

278 

279 def penalty_matrix(self, alpha=None): 

280 """penalty matrix for generalized additive model 

281 

282 Parameters 

283 ---------- 

284 alpha : list of floats or None 

285 penalty weights 

286 

287 Returns 

288 ------- 

289 penalty matrix 

290 block diagonal, square penalty matrix for quadratic penalization. 

291 The number of rows and columns are equal to the number of 

292 parameters in the regression model ``k_params``. 

293 

294 Notes 

295 ----- 

296 statsmodels does not support backwards compatibility when keywords are 

297 used as positional arguments. The order of keywords might change. 

298 We might need to add a ``params`` keyword if the need arises. 

299 """ 

300 if alpha is None: 

301 alpha = self.alpha 

302 

303 s_all = [np.zeros((self.start_idx, self.start_idx))] 

304 for i in range(self.k_variables): 

305 s_all.append(self.gp[i].penalty_matrix(alpha=alpha[i])) 

306 

307 return block_diag(*s_all)