Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/gam/generalized_additive_model.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Generalized Additive Models
5Author: Luca Puggini
6Author: Josef Perktold
8created on 08/07/2015
9"""
11from collections.abc import Iterable
12import copy # check if needed when dropping python 2.7
14import numpy as np
15from scipy import optimize
16import pandas as pd
18import statsmodels.base.wrapper as wrap
20from statsmodels.discrete.discrete_model import Logit
21from statsmodels.genmod.generalized_linear_model import (
22 GLM, GLMResults, GLMResultsWrapper, _check_convergence)
23import statsmodels.regression.linear_model as lm
24# import statsmodels.regression._tools as reg_tools # TODO: use this for pirls
25from statsmodels.tools.sm_exceptions import (PerfectSeparationError,
26 ValueWarning)
27from statsmodels.tools.decorators import cache_readonly
28from statsmodels.tools.data import _is_using_pandas
29from statsmodels.tools.linalg import matrix_sqrt
31from statsmodels.base._penalized import PenalizedMixin
32from statsmodels.gam.gam_penalties import MultivariateGamPenalty
33from statsmodels.gam.gam_cross_validation.gam_cross_validation import (
34 MultivariateGAMCVPath)
35from statsmodels.gam.gam_cross_validation.cross_validators import KFold
38def _transform_predict_exog(model, exog, design_info=None):
39 """transform exog for predict using design_info
41 Note: this is copied from base.model.Results.predict and converted to
42 standalone function with additional options.
43 """
45 is_pandas = _is_using_pandas(exog, None)
47 exog_index = exog.index if is_pandas else None
49 if design_info is None:
50 design_info = getattr(model.data, 'design_info', None)
52 if design_info is not None and (exog is not None):
53 from patsy import dmatrix
54 if isinstance(exog, pd.Series):
55 # we are guessing whether it should be column or row
56 if (hasattr(exog, 'name') and isinstance(exog.name, str) and
57 exog.name in design_info.describe()):
58 # assume we need one column
59 exog = pd.DataFrame(exog)
60 else:
61 # assume we need a row
62 exog = pd.DataFrame(exog).T
63 orig_exog_len = len(exog)
64 is_dict = isinstance(exog, dict)
65 exog = dmatrix(design_info, exog, return_type="dataframe")
66 if orig_exog_len > len(exog) and not is_dict:
67 import warnings
68 if exog_index is None:
69 warnings.warn('nan values have been dropped', ValueWarning)
70 else:
71 exog = exog.reindex(exog_index)
72 exog_index = exog.index
74 if exog is not None:
75 exog = np.asarray(exog)
76 if exog.ndim == 1 and (model.exog.ndim == 1 or
77 model.exog.shape[1] == 1):
78 exog = exog[:, None]
79 exog = np.atleast_2d(exog) # needed in count model shape[1]
81 return exog, exog_index
84class GLMGamResults(GLMResults):
85 """Results class for generalized additive models, GAM.
87 This inherits from GLMResults.
89 Warning: some inherited methods might not correctly take account of the
90 penalization
92 GLMGamResults inherits from GLMResults
93 All methods related to the loglikelihood function return the penalized
94 values.
96 Attributes
97 ----------
99 edf
100 list of effective degrees of freedom for each column of the design
101 matrix.
102 hat_matrix_diag
103 diagonal of hat matrix
104 gcv
105 generalized cross-validation criterion computed as
106 ``gcv = scale / (1. - hat_matrix_trace / self.nobs)**2``
107 cv
108 cross-validation criterion computed as
109 ``cv = ((resid_pearson / (1 - hat_matrix_diag))**2).sum() / nobs``
111 Notes
112 -----
113 status: experimental
114 """
116 def __init__(self, model, params, normalized_cov_params, scale, **kwds):
118 # this is a messy way to compute edf and update scale
119 # need several attributes to compute edf
120 self.model = model
121 self.params = params
122 self.normalized_cov_params = normalized_cov_params
123 self.scale = scale
124 edf = self.edf.sum()
125 self.df_model = edf - 1 # assume constant
126 # need to use nobs or wnobs attribute
127 self.df_resid = self.model.endog.shape[0] - edf
129 # we are setting the model df for the case when super is using it
130 # df in model will be incorrect state when alpha/pen_weight changes
131 self.model.df_model = self.df_model
132 self.model.df_resid = self.df_resid
133 mu = self.fittedvalues
134 self.scale = scale = self.model.estimate_scale(mu)
135 super(GLMGamResults, self).__init__(model, params,
136 normalized_cov_params, scale,
137 **kwds)
139 def _tranform_predict_exog(self, exog=None, exog_smooth=None,
140 transform=True):
141 """Transform original explanatory variables for prediction
143 Parameters
144 ----------
145 exog : array_like, optional
146 The values for the linear explanatory variables.
147 exog_smooth : array_like
148 values for the variables in the smooth terms
149 transform : bool, optional
150 If transform is False, then ``exog`` is returned unchanged and
151 ``x`` is ignored. It is assumed that exog contains the full
152 design matrix for the predict observations.
153 If transform is True, then the basis representation of the smooth
154 term will be constructed from the provided ``x``.
156 Returns
157 -------
158 exog_transformed : ndarray
159 design matrix for the prediction
160 """
161 exog_index = None
162 if transform is False:
163 # the following allows that either or both exog are not None
164 if exog_smooth is None:
165 # exog could be None or array
166 ex = exog
167 else:
168 if exog is None:
169 ex = exog_smooth
170 else:
171 ex = np.column_stack((exog, exog_smooth))
172 else:
173 # transform exog_linear if needed
174 if exog is not None and hasattr(self.model, 'design_info_linear'):
175 exog, exog_index = _transform_predict_exog(
176 self.model, exog, self.model.design_info_linear)
178 # create smooth basis
179 if exog_smooth is not None:
180 ex_smooth = self.model.smoother.transform(exog_smooth)
181 if exog is None:
182 ex = ex_smooth
183 else:
184 # TODO: there might be problems is exog_smooth is 1-D
185 ex = np.column_stack((exog, ex_smooth))
186 else:
187 ex = exog
189 return ex, exog_index
191 def predict(self, exog=None, exog_smooth=None, transform=True, **kwargs):
192 """"
193 compute prediction
195 Parameters
196 ----------
197 exog : array_like, optional
198 The values for the linear explanatory variables
199 exog_smooth : array_like
200 values for the variables in the smooth terms
201 transform : bool, optional
202 If transform is True, then the basis representation of the smooth
203 term will be constructed from the provided ``exog``.
204 kwargs :
205 Some models can take additional arguments or keywords, see the
206 predict method of the model for the details.
208 Returns
209 -------
210 prediction : ndarray, pandas.Series or pandas.DataFrame
211 predicted values
212 """
213 ex, exog_index = self._tranform_predict_exog(exog=exog,
214 exog_smooth=exog_smooth,
215 transform=transform)
216 predict_results = super(GLMGamResults, self).predict(ex,
217 transform=False,
218 **kwargs)
219 if exog_index is not None and not hasattr(
220 predict_results, 'predicted_values'):
221 if predict_results.ndim == 1:
222 return pd.Series(predict_results, index=exog_index)
223 else:
224 return pd.DataFrame(predict_results, index=exog_index)
225 else:
226 return predict_results
228 def get_prediction(self, exog=None, exog_smooth=None, transform=True,
229 **kwargs):
230 """compute prediction results
232 Parameters
233 ----------
234 exog : array_like, optional
235 The values for which you want to predict.
236 exog_smooth : array_like
237 values for the variables in the smooth terms
238 transform : bool, optional
239 If transform is True, then the basis representation of the smooth
240 term will be constructed from the provided ``x``.
241 kwargs :
242 Some models can take additional arguments or keywords, see the
243 predict method of the model for the details.
245 Returns
246 -------
247 prediction_results : generalized_linear_model.PredictionResults
248 The prediction results instance contains prediction and prediction
249 variance and can on demand calculate confidence intervals and
250 summary tables for the prediction of the mean and of new
251 observations.
252 """
253 ex, exog_index = self._tranform_predict_exog(exog=exog,
254 exog_smooth=exog_smooth,
255 transform=transform)
256 return super(GLMGamResults, self).get_prediction(ex, transform=False,
257 **kwargs)
259 def partial_values(self, smooth_index, include_constant=True):
260 """contribution of a smooth term to the linear prediction
262 Warning: This will be replaced by a predict method
264 Parameters
265 ----------
266 smooth_index : int
267 index of the smooth term within list of smooth terms
268 include_constant : bool
269 If true, then the estimated intercept is added to the prediction
270 and its standard errors. This avoids that the confidence interval
271 has zero width at the imposed identification constraint, e.g.
272 either at a reference point or at the mean.
274 Returns
275 -------
276 predicted : nd_array
277 predicted value of linear term.
278 This is not the expected response if the link function is not
279 linear.
280 se_pred : nd_array
281 standard error of linear prediction
282 """
283 variable = smooth_index
284 smoother = self.model.smoother
285 mask = smoother.mask[variable]
287 start_idx = self.model.k_exog_linear
288 idx = start_idx + np.nonzero(mask)[0]
290 # smoother has only smooth parts, not exog_linear
291 exog_part = smoother.basis[:, mask]
293 const_idx = self.model.data.const_idx
294 if include_constant and const_idx is not None:
295 idx = np.concatenate(([const_idx], idx))
296 exog_part = self.model.exog[:, idx]
298 linpred = np.dot(exog_part, self.params[idx])
299 # select the submatrix corresponding to a single variable
300 partial_cov_params = self.cov_params(column=idx)
302 covb = partial_cov_params
303 var = (exog_part * np.dot(covb, exog_part.T).T).sum(1)
304 se = np.sqrt(var)
306 return linpred, se
308 def plot_partial(self, smooth_index, plot_se=True, cpr=False,
309 include_constant=True, ax=None):
310 """plot the contribution of a smooth term to the linear prediction
312 Parameters
313 ----------
314 smooth_index : int
315 index of the smooth term within list of smooth terms
316 plot_se : book
317 If plot_se is true, then the confidence interval for the linear
318 prediction will be added to the plot.
319 cpr : bool
320 If cpr (component plus residual) is true, the a scatter plot of
321 the partial working residuals will be added to the plot.
322 include_constant : bool
323 If true, then the estimated intercept is added to the prediction
324 and its standard errors. This avoids that the confidence interval
325 has zero width at the imposed identification constraint, e.g.
326 either at a reference point or at the mean.
327 ax : None or matplotlib axis instance
328 If ax is not None, then the plot will be added to it.
330 Returns
331 -------
332 Figure
333 If `ax` is None, the created figure. Otherwise the Figure to which
334 `ax` is connected.
335 """
336 from statsmodels.graphics.utils import _import_mpl, create_mpl_ax
337 _import_mpl()
339 variable = smooth_index
340 y_est, se = self.partial_values(variable,
341 include_constant=include_constant)
342 smoother = self.model.smoother
343 x = smoother.smoothers[variable].x
344 sort_index = np.argsort(x)
345 x = x[sort_index]
346 y_est = y_est[sort_index]
347 se = se[sort_index]
349 fig, ax = create_mpl_ax(ax)
350 ax.plot(x, y_est, c='blue', lw=2)
351 if plot_se:
352 ax.plot(x, y_est + 1.96 * se, '-', c='blue')
353 ax.plot(x, y_est - 1.96 * se, '-', c='blue')
354 if cpr:
355 # TODO: resid_response does not make sense with nonlinear link
356 # use resid_working ?
357 cpr_ = y_est + self.resid_working
358 ax.plot(x, cpr_, '.', lw=2)
360 ax.set_xlabel(smoother.smoothers[variable].variable_name)
362 return fig
364 def test_significance(self, smooth_index):
365 """hypothesis test that a smooth component is zero.
367 This calls `wald_test` to compute the hypothesis test, but uses
368 effective degrees of freedom.
370 Parameters
371 ----------
372 smooth_index : int
373 index of the smooth term within list of smooth terms
375 Returns
376 -------
377 wald_test : ContrastResults instance
378 the results instance created by `wald_test`
379 """
381 variable = smooth_index
382 smoother = self.model.smoother
383 start_idx = self.model.k_exog_linear
385 k_params = len(self.params)
386 # a bit messy, we need first index plus length of smooth term
387 mask = smoother.mask[variable]
388 k_constraints = mask.sum()
389 idx = start_idx + np.nonzero(mask)[0][0]
390 constraints = np.eye(k_constraints, k_params, idx)
391 df_constraints = self.edf[idx: idx + k_constraints].sum()
393 return self.wald_test(constraints, df_constraints=df_constraints)
395 def get_hat_matrix_diag(self, observed=True, _axis=1):
396 """
397 Compute the diagonal of the hat matrix
399 Parameters
400 ----------
401 observed : bool
402 If true, then observed hessian is used in the hat matrix
403 computation. If false, then the expected hessian is used.
404 In the case of a canonical link function both are the same.
405 This is only relevant for models that implement both observed
406 and expected Hessian, which is currently only GLM. Other
407 models only use the observed Hessian.
408 _axis : int
409 This is mainly for internal use. By default it returns the usual
410 diagonal of the hat matrix. If _axis is zero, then the result
411 corresponds to the effective degrees of freedom, ``edf`` for each
412 column of exog.
414 Returns
415 -------
416 hat_matrix_diag : ndarray
417 The diagonal of the hat matrix computed from the observed
418 or expected hessian.
419 """
420 weights = self.model.hessian_factor(self.params, scale=self.scale,
421 observed=observed)
422 wexog = np.sqrt(weights)[:, None] * self.model.exog
424 # we can use inverse hessian directly instead of computing it from
425 # WLS/IRLS as in GLM
427 # TODO: does `normalized_cov_params * scale` work in all cases?
428 # this avoids recomputing hessian, check when used for other models.
429 hess_inv = self.normalized_cov_params * self.scale
430 # this is in GLM equivalent to the more generic and direct
431 # hess_inv = np.linalg.inv(-self.model.hessian(self.params))
432 hd = (wexog * hess_inv.dot(wexog.T).T).sum(axis=_axis)
433 return hd
435 @cache_readonly
436 def edf(self):
437 return self.get_hat_matrix_diag(_axis=0)
439 @cache_readonly
440 def hat_matrix_trace(self):
441 return self.hat_matrix_diag.sum()
443 @cache_readonly
444 def hat_matrix_diag(self):
445 return self.get_hat_matrix_diag(observed=True)
447 @cache_readonly
448 def gcv(self):
449 return self.scale / (1. - self.hat_matrix_trace / self.nobs)**2
451 @cache_readonly
452 def cv(self):
453 cv_ = ((self.resid_pearson / (1. - self.hat_matrix_diag))**2).sum()
454 cv_ /= self.nobs
455 return cv_
458class GLMGamResultsWrapper(GLMResultsWrapper):
459 pass
462wrap.populate_wrapper(GLMGamResultsWrapper, GLMGamResults)
465class GLMGam(PenalizedMixin, GLM):
466 """
467 Generalized Additive Models (GAM)
469 This inherits from `GLM`.
471 Warning: Not all inherited methods might take correctly account of the
472 penalization. Not all options including offset and exposure have been
473 verified yet.
475 Parameters
476 ----------
477 endog : array_like
478 The response variable.
479 exog : array_like or None
480 This explanatory variables are treated as linear. The model in this
481 case is a partial linear model.
482 smoother : instance of additive smoother class
483 Examples of smoother instances include Bsplines or CyclicCubicSplines.
484 alpha : float or list of floats
485 Penalization weights for smooth terms. The length of the list needs
486 to be the same as the number of smooth terms in the ``smoother``.
487 family : instance of GLM family
488 See GLM.
489 offset : None or array_like
490 See GLM.
491 exposure : None or array_like
492 See GLM.
493 missing : 'none'
494 Missing value handling is not supported in this class.
495 **kwargs
496 Extra keywords are used in call to the super classes.
498 Notes
499 -----
500 Status: experimental. This has full unit test coverage for the core
501 results with Gaussian and Poisson (without offset and exposure). Other
502 options and additional results might not be correctly supported yet.
503 (Binomial with counts, i.e. with n_trials, is most likely wrong in pirls.
504 User specified var or freq weights are most likely also not correct for
505 all results.)
506 """
508 _results_class = GLMGamResults
509 _results_class_wrapper = GLMGamResultsWrapper
511 def __init__(self, endog, exog=None, smoother=None, alpha=0, family=None,
512 offset=None, exposure=None, missing='none', **kwargs):
514 # TODO: check usage of hasconst
515 hasconst = kwargs.get('hasconst', None)
516 xnames_linear = None
517 if hasattr(exog, 'design_info'):
518 self.design_info_linear = exog.design_info
519 xnames_linear = self.design_info_linear.column_names
521 is_pandas = _is_using_pandas(exog, None)
523 # TODO: handle data is experimental, see #5469
524 # This is a bit wasteful because we need to `handle_data twice`
525 self.data_linear = self._handle_data(endog, exog, missing, hasconst)
526 if xnames_linear is None:
527 xnames_linear = self.data_linear.xnames
528 if exog is not None:
529 exog_linear = self.data_linear.exog
530 k_exog_linear = exog_linear.shape[1]
531 else:
532 exog_linear = None
533 k_exog_linear = 0
534 self.k_exog_linear = k_exog_linear
535 # We need exog_linear for k-fold cross validation
536 # TODO: alternative is to take columns from combined exog
537 self.exog_linear = exog_linear
539 self.smoother = smoother
540 self.k_smooths = smoother.k_variables
541 self.alpha = self._check_alpha(alpha)
542 penal = MultivariateGamPenalty(smoother, alpha=self.alpha,
543 start_idx=k_exog_linear)
544 kwargs.pop('penal', None)
545 if exog_linear is not None:
546 exog = np.column_stack((exog_linear, smoother.basis))
547 else:
548 exog = smoother.basis
550 # TODO: check: xnames_linear will be None instead of empty list
551 # if no exog_linear
552 # can smoother be empty ? I guess not allowed.
553 if xnames_linear is None:
554 xnames_linear = []
555 xnames = xnames_linear + self.smoother.col_names
557 if is_pandas and exog_linear is not None:
558 # we a dataframe so we can get a PandasData instance for wrapping
559 exog = pd.DataFrame(exog, index=self.data_linear.row_labels,
560 columns=xnames)
562 super(GLMGam, self).__init__(endog, exog=exog, family=family,
563 offset=offset, exposure=exposure,
564 penal=penal, missing=missing, **kwargs)
566 if not is_pandas:
567 # set exog nanmes if not given by pandas DataFrame
568 self.exog_names[:] = xnames
570 # TODO: the generic data handling might attach the design_info from the
571 # linear part, but this is incorrect for the full model and
572 # causes problems in wald_test_terms
574 if hasattr(self.data, 'design_info'):
575 del self.data.design_info
576 # formula also might be attached which causes problems in predict
577 if hasattr(self, 'formula'):
578 self.formula_linear = self.formula
579 self.formula = None
580 del self.formula
582 def _check_alpha(self, alpha):
583 """check and convert alpha to required list format
585 Parameters
586 ----------
587 alpha : scalar, list or array_like
588 penalization weight
590 Returns
591 -------
592 alpha : list
593 penalization weight, list with length equal to the number of
594 smooth terms
595 """
596 if not isinstance(alpha, Iterable):
597 alpha = [alpha] * len(self.smoother.smoothers)
598 elif not isinstance(alpha, list):
599 # we want alpha to be a list
600 alpha = list(alpha)
601 return alpha
603 def fit(self, start_params=None, maxiter=1000, method='pirls', tol=1e-8,
604 scale=None, cov_type='nonrobust', cov_kwds=None, use_t=None,
605 full_output=True, disp=False, max_start_irls=3, **kwargs):
606 """estimate parameters and create instance of GLMGamResults class
608 Parameters
609 ----------
610 most parameters are the same as for GLM
611 method : optimization method
612 The special optimization method is "pirls" which uses a penalized
613 version of IRLS. Other methods are gradient optimizers as used in
614 base.model.LikelihoodModel.
616 Returns
617 -------
618 res : instance of wrapped GLMGamResults
619 """
620 # TODO: temporary hack to remove attribute
621 # formula also might be attached which in inherited from_formula
622 # causes problems in predict
623 if hasattr(self, 'formula'):
624 self.formula_linear = self.formula
625 del self.formula
627 # TODO: alpha not allowed yet, but is in `_fit_pirls`
628 # alpha = self._check_alpha()
630 if method.lower() in ['pirls', 'irls']:
631 res = self._fit_pirls(self.alpha, start_params=start_params,
632 maxiter=maxiter, tol=tol, scale=scale,
633 cov_type=cov_type, cov_kwds=cov_kwds,
634 use_t=use_t, **kwargs)
635 else:
636 if max_start_irls > 0 and (start_params is None):
637 res = self._fit_pirls(self.alpha, start_params=start_params,
638 maxiter=max_start_irls, tol=tol,
639 scale=scale,
640 cov_type=cov_type, cov_kwds=cov_kwds,
641 use_t=use_t, **kwargs)
642 start_params = res.params
643 del res
644 res = super(GLMGam, self).fit(start_params=start_params,
645 maxiter=maxiter, method=method,
646 tol=tol, scale=scale,
647 cov_type=cov_type, cov_kwds=cov_kwds,
648 use_t=use_t,
649 full_output=full_output, disp=disp,
650 max_start_irls=0,
651 **kwargs)
652 return res
654 # pag 165 4.3 # pag 136 PIRLS
655 def _fit_pirls(self, alpha, start_params=None, maxiter=100, tol=1e-8,
656 scale=None, cov_type='nonrobust', cov_kwds=None, use_t=None,
657 weights=None):
658 """fit model with penalized reweighted least squares
659 """
660 # TODO: this currently modifies several attributes
661 # self.scale, self.scaletype, self.mu, self.weights
662 # self.data_weights,
663 # and possibly self._offset_exposure
664 # several of those might not be necessary, e.g. mu and weights
666 # alpha = alpha * len(y) * self.scale / 100
667 # TODO: we need to rescale alpha
668 endog = self.endog
669 wlsexog = self.exog # smoother.basis
670 spl_s = self.penal.penalty_matrix(alpha=alpha)
672 nobs, n_columns = wlsexog.shape
674 # TODO what are these values?
675 if weights is None:
676 self.data_weights = np.array([1.] * nobs)
677 else:
678 self.data_weights = weights
680 if not hasattr(self, '_offset_exposure'):
681 self._offset_exposure = 0
683 self.scaletype = scale
684 # TODO: check default scale types
685 # self.scaletype = 'dev'
686 # during iteration
687 self.scale = 1
689 if start_params is None:
690 mu = self.family.starting_mu(endog)
691 lin_pred = self.family.predict(mu)
692 else:
693 lin_pred = np.dot(wlsexog, start_params) + self._offset_exposure
694 mu = self.family.fitted(lin_pred)
695 dev = self.family.deviance(endog, mu)
697 history = dict(params=[None, start_params], deviance=[np.inf, dev])
698 converged = False
699 criterion = history['deviance']
700 # This special case is used to get the likelihood for a specific
701 # params vector.
702 if maxiter == 0:
703 mu = self.family.fitted(lin_pred)
704 self.scale = self.estimate_scale(mu)
705 wls_results = lm.RegressionResults(self, start_params, None)
706 iteration = 0
708 for iteration in range(maxiter):
710 # TODO: is this equivalent to point 1 of page 136:
711 # w = 1 / (V(mu) * g'(mu)) ?
712 self.weights = self.data_weights * self.family.weights(mu)
714 # TODO: is this equivalent to point 1 of page 136:
715 # z = g(mu)(y - mu) + X beta ?
716 wlsendog = (lin_pred + self.family.link.deriv(mu) * (endog - mu)
717 - self._offset_exposure)
719 # this defines the augmented matrix point 2a on page 136
720 wls_results = penalized_wls(wlsendog, wlsexog, spl_s, self.weights)
721 lin_pred = np.dot(wlsexog, wls_results.params).ravel()
722 lin_pred += self._offset_exposure
723 mu = self.family.fitted(lin_pred)
725 # We do not need to update scale in GLM/LEF models
726 # We might need it in dispersion models.
727 # self.scale = self.estimate_scale(mu)
728 history = self._update_history(wls_results, mu, history)
730 if endog.squeeze().ndim == 1 and np.allclose(mu - endog, 0):
731 msg = "Perfect separation detected, results not available"
732 raise PerfectSeparationError(msg)
734 # TODO need atol, rtol
735 # args of _check_convergence: (criterion, iteration, atol, rtol)
736 converged = _check_convergence(criterion, iteration, tol, 0)
737 if converged:
738 break
739 self.mu = mu
740 self.scale = self.estimate_scale(mu)
741 glm_results = GLMGamResults(self, wls_results.params,
742 wls_results.normalized_cov_params,
743 self.scale,
744 cov_type=cov_type, cov_kwds=cov_kwds,
745 use_t=use_t)
747 glm_results.method = "PIRLS"
748 history['iteration'] = iteration + 1
749 glm_results.fit_history = history
750 glm_results.converged = converged
752 return GLMGamResultsWrapper(glm_results)
754 def select_penweight(self, criterion='aic', start_params=None,
755 start_model_params=None,
756 method='basinhopping', **fit_kwds):
757 """find alpha by minimizing results criterion
759 The objective for the minimization can be results attributes like
760 ``gcv``, ``aic`` or ``bic`` where the latter are based on effective
761 degrees of freedom.
763 Warning: In many case the optimization might converge to a local
764 optimum or near optimum. Different start_params or using a global
765 optimizer is recommended, default is basinhopping.
767 Parameters
768 ----------
769 criterion='aic'
770 name of results attribute to be minimized.
771 Default is 'aic', other options are 'gcv', 'cv' or 'bic'.
772 start_params : None or array
773 starting parameters for alpha in the penalization weight
774 minimization. The parameters are internally exponentiated and
775 the minimization is with respect to ``exp(alpha)``
776 start_model_params : None or array
777 starting parameter for the ``model._fit_pirls``.
778 method : 'basinhopping', 'nm' or 'minimize'
779 'basinhopping' and 'nm' directly use the underlying scipy.optimize
780 functions `basinhopping` and `fmin`. 'minimize' provides access
781 to the high level interface, `scipy.optimize.minimize`.
782 fit_kwds : keyword arguments
783 additional keyword arguments will be used in the call to the
784 scipy optimizer. Which keywords are supported depends on the
785 scipy optimization function.
787 Returns
788 -------
789 alpha : ndarray
790 penalization parameter found by minimizing the criterion.
791 Note that this can be only a local (near) optimum.
792 fit_res : tuple
793 results returned by the scipy optimization routine. The
794 parameters in the optimization problem are `log(alpha)`
795 history : dict
796 history of calls to pirls and contains alpha, the fit
797 criterion and the parameters to which pirls converged to for the
798 given alpha.
800 Notes
801 -----
802 In the test cases Nelder-Mead and bfgs often converge to local optima,
803 see also https://github.com/statsmodels/statsmodels/issues/5381.
805 This does not use any analytical derivatives for the criterion
806 minimization.
808 Status: experimental, It is possible that defaults change if there
809 is a better way to find a global optimum. API (e.g. type of return)
810 might also change.
811 """
812 # copy attributes that are changed, so we can reset them
813 scale_keep = self.scale
814 scaletype_keep = self.scaletype
815 # TODO: use .copy() method when available for all types
816 alpha_keep = copy.copy(self.alpha)
818 if start_params is None:
819 start_params = np.zeros(self.k_smooths)
820 else:
821 start_params = np.log(1e-20 + start_params)
823 history = {}
824 history['alpha'] = []
825 history['params'] = [start_model_params]
826 history['criterion'] = []
828 def fun(p):
829 a = np.exp(p)
830 res_ = self._fit_pirls(start_params=history['params'][-1],
831 alpha=a)
832 history['alpha'].append(a)
833 history['params'].append(np.asarray(res_.params))
834 return getattr(res_, criterion)
836 if method == 'nm':
837 kwds = dict(full_output=True, maxiter=1000, maxfun=2000)
838 kwds.update(fit_kwds)
839 fit_res = optimize.fmin(fun, start_params, **kwds)
840 opt = fit_res[0]
841 elif method == 'basinhopping':
842 kwds = dict(minimizer_kwargs={'method': 'Nelder-Mead',
843 'options': {'maxiter': 100, 'maxfev': 500}},
844 niter=10)
845 kwds.update(fit_kwds)
846 fit_res = optimize.basinhopping(fun, start_params, **kwds)
847 opt = fit_res.x
848 elif method == 'minimize':
849 fit_res = optimize.minimize(fun, start_params, **fit_kwds)
850 opt = fit_res.x
851 else:
852 raise ValueError('method not recognized')
854 del history['params'][0] # remove the model start_params
856 alpha = np.exp(opt)
858 # reset attributes that have or might have changed
859 self.scale = scale_keep
860 self.scaletype = scaletype_keep
861 self.alpha = alpha_keep
863 return alpha, fit_res, history
865 def select_penweight_kfold(self, alphas=None, cv_iterator=None, cost=None,
866 k_folds=5, k_grid=11):
867 """find alphas by k-fold cross-validation
869 Warning: This estimates ``k_folds`` models for each point in the
870 grid of alphas.
872 Parameters
873 ----------
874 alphas : None or list of arrays
875 cv_iterator : instance
876 instance of a cross-validation iterator, by default this is a
877 KFold instance
878 cost : function
879 default is mean squared error. The cost function to evaluate the
880 prediction error for the left out sample. This should take two
881 arrays as argument and return one float.
882 k_folds : int
883 number of folds if default Kfold iterator is used.
884 This is ignored if ``cv_iterator`` is not None.
886 Returns
887 -------
888 alpha_cv : list of float
889 Best alpha in grid according to cross-validation
890 res_cv : instance of MultivariateGAMCVPath
891 The instance was used for cross-validation and holds the results
893 Notes
894 -----
895 The default alphas are defined as
896 ``alphas = [np.logspace(0, 7, k_grid) for _ in range(k_smooths)]``
897 """
899 if cost is None:
900 def cost(x1, x2):
901 return np.linalg.norm(x1 - x2) / len(x1)
903 if alphas is None:
904 alphas = [np.logspace(0, 7, k_grid) for _ in range(self.k_smooths)]
906 if cv_iterator is None:
907 cv_iterator = KFold(k_folds=k_folds, shuffle=True)
909 gam_cv = MultivariateGAMCVPath(smoother=self.smoother, alphas=alphas,
910 gam=GLMGam, cost=cost, endog=self.endog,
911 exog=self.exog_linear,
912 cv_iterator=cv_iterator)
913 gam_cv_res = gam_cv.fit()
915 return gam_cv_res.alpha_cv, gam_cv_res
918class LogitGam(PenalizedMixin, Logit):
919 """Generalized Additive model for discrete Logit
921 This subclasses discrete_model Logit.
923 Warning: not all inherited methods might take correctly account of the
924 penalization
926 not verified yet.
927 """
928 def __init__(self, endog, smoother, alpha, *args, **kwargs):
929 if not isinstance(alpha, Iterable):
930 alpha = np.array([alpha] * len(smoother.smoothers))
932 self.smoother = smoother
933 self.alpha = alpha
934 self.pen_weight = 1 # TODO: pen weight should not be defined here!!
935 penal = MultivariateGamPenalty(smoother, alpha=alpha)
937 super(LogitGam, self).__init__(endog, smoother.basis, penal=penal,
938 *args, **kwargs)
941def penalized_wls(endog, exog, penalty_matrix, weights):
942 """weighted least squares with quadratic penalty
944 Parameters
945 ----------
946 endog : ndarray
947 response or endogenous variable
948 exog : ndarray
949 design matrix, matrix of exogenous or explanatory variables
950 penalty_matrix : ndarray, 2-Dim square
951 penality matrix for quadratic penalization. Note, the penalty_matrix
952 is multiplied by two to match non-pirls fitting methods.
953 weights : ndarray
954 weights for WLS
956 Returns
957 -------
958 results : Results instance of WLS
959 """
960 y, x, s = endog, exog, penalty_matrix
961 # TODO: I do not understand why I need 2 * s
962 aug_y, aug_x, aug_weights = make_augmented_matrix(y, x, 2 * s, weights)
963 wls_results = lm.WLS(aug_y, aug_x, aug_weights).fit()
964 # TODO: use MinimalWLS during iterations, less overhead
965 # However, MinimalWLS does not return normalized_cov_params
966 # which we need at the end of the iterations
967 # call would be
968 # wls_results = reg_tools._MinimalWLS(aug_y, aug_x, aug_weights).fit()
969 wls_results.params = wls_results.params.ravel()
971 return wls_results
974def make_augmented_matrix(endog, exog, penalty_matrix, weights):
975 """augment endog, exog and weights with stochastic restriction matrix
977 Parameters
978 ----------
979 endog : ndarray
980 response or endogenous variable
981 exog : ndarray
982 design matrix, matrix of exogenous or explanatory variables
983 penalty_matrix : ndarray, 2-Dim square
984 penality matrix for quadratic penalization
985 weights : ndarray
986 weights for WLS
988 Returns
989 -------
990 endog_aug : ndarray
991 augmented response variable
992 exog_aug : ndarray
993 augmented design matrix
994 weights_aug : ndarray
995 augmented weights for WLS
996 """
997 y, x, s, = endog, exog, penalty_matrix
998 nobs = x.shape[0]
1000 # TODO: needs full because of broadcasting with weights
1001 # check what weights should be doing
1002 rs = matrix_sqrt(s)
1003 x1 = np.vstack([x, rs]) # augmented x
1004 n_samp1es_x1 = x1.shape[0]
1006 y1 = np.array([0.] * n_samp1es_x1) # augmented y
1007 y1[:nobs] = y
1009 id1 = np.array([1.] * rs.shape[0])
1010 w1 = np.concatenate([weights, id1])
1012 return y1, x1, w1