Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/genmod/generalized_linear_model.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Generalized linear models currently supports estimation using the one-parameter
3exponential families
5References
6----------
7Gill, Jeff. 2000. Generalized Linear Models: A Unified Approach.
8 SAGE QASS Series.
10Green, PJ. 1984. "Iteratively reweighted least squares for maximum
11 likelihood estimation, and some robust and resistant alternatives."
12 Journal of the Royal Statistical Society, Series B, 46, 149-192.
14Hardin, J.W. and Hilbe, J.M. 2007. "Generalized Linear Models and
15 Extensions." 2nd ed. Stata Press, College Station, TX.
17McCullagh, P. and Nelder, J.A. 1989. "Generalized Linear Models." 2nd ed.
18 Chapman & Hall, Boca Rotan.
19"""
20import numpy as np
22from . import families
24from statsmodels.tools.decorators import (cache_readonly,
25 cached_data, cached_value)
26from statsmodels.compat.pandas import Appender
28import statsmodels.base.model as base
29import statsmodels.regression.linear_model as lm
30import statsmodels.base.wrapper as wrap
31import statsmodels.regression._tools as reg_tools
33from statsmodels.graphics._regressionplots_doc import (
34 _plot_added_variable_doc,
35 _plot_partial_residuals_doc,
36 _plot_ceres_residuals_doc)
38# need import in module instead of lazily to copy `__doc__`
39from . import _prediction as pred
40from statsmodels.genmod._prediction import PredictionResults
42from statsmodels.tools.sm_exceptions import (PerfectSeparationError,
43 DomainWarning,
44 HessianInversionWarning)
46from numpy.linalg.linalg import LinAlgError
48__all__ = ['GLM', 'PredictionResults']
51def _check_convergence(criterion, iteration, atol, rtol):
52 return np.allclose(criterion[iteration], criterion[iteration + 1],
53 atol=atol, rtol=rtol)
56class GLM(base.LikelihoodModel):
57 __doc__ = """
58 Generalized Linear Models
60 GLM inherits from statsmodels.base.model.LikelihoodModel
62 Parameters
63 ----------
64 endog : array_like
65 1d array of endogenous response variable. This array can be 1d or 2d.
66 Binomial family models accept a 2d array with two columns. If
67 supplied, each observation is expected to be [success, failure].
68 exog : array_like
69 A nobs x k array where `nobs` is the number of observations and `k`
70 is the number of regressors. An intercept is not included by default
71 and should be added by the user (models specified using a formula
72 include an intercept by default). See `statsmodels.tools.add_constant`.
73 family : family class instance
74 The default is Gaussian. To specify the binomial distribution
75 family = sm.family.Binomial()
76 Each family can take a link instance as an argument. See
77 statsmodels.family.family for more information.
78 offset : array_like or None
79 An offset to be included in the model. If provided, must be
80 an array whose length is the number of rows in exog.
81 exposure : array_like or None
82 Log(exposure) will be added to the linear prediction in the model.
83 Exposure is only valid if the log link is used. If provided, it must be
84 an array with the same length as endog.
85 freq_weights : array_like
86 1d array of frequency weights. The default is None. If None is selected
87 or a blank value, then the algorithm will replace with an array of 1's
88 with length equal to the endog.
89 WARNING: Using weights is not verified yet for all possible options
90 and results, see Notes.
91 var_weights : array_like
92 1d array of variance (analytic) weights. The default is None. If None
93 is selected or a blank value, then the algorithm will replace with an
94 array of 1's with length equal to the endog.
95 WARNING: Using weights is not verified yet for all possible options
96 and results, see Notes.
97 %(extra_params)s
99 Attributes
100 ----------
101 df_model : float
102 Model degrees of freedom is equal to p - 1, where p is the number
103 of regressors. Note that the intercept is not reported as a
104 degree of freedom.
105 df_resid : float
106 Residual degrees of freedom is equal to the number of observation n
107 minus the number of regressors p.
108 endog : ndarray
109 See Notes. Note that `endog` is a reference to the data so that if
110 data is already an array and it is changed, then `endog` changes
111 as well.
112 exposure : array_like
113 Include ln(exposure) in model with coefficient constrained to 1. Can
114 only be used if the link is the logarithm function.
115 exog : ndarray
116 See Notes. Note that `exog` is a reference to the data so that if
117 data is already an array and it is changed, then `exog` changes
118 as well.
119 freq_weights : ndarray
120 See Notes. Note that `freq_weights` is a reference to the data so that
121 if data is already an array and it is changed, then `freq_weights`
122 changes as well.
123 var_weights : ndarray
124 See Notes. Note that `var_weights` is a reference to the data so that
125 if data is already an array and it is changed, then `var_weights`
126 changes as well.
127 iteration : int
128 The number of iterations that fit has run. Initialized at 0.
129 family : family class instance
130 The distribution family of the model. Can be any family in
131 statsmodels.families. Default is Gaussian.
132 mu : ndarray
133 The mean response of the transformed variable. `mu` is the value of
134 the inverse of the link function at lin_pred, where lin_pred is the
135 linear predicted value of the WLS fit of the transformed variable.
136 `mu` is only available after fit is called. See
137 statsmodels.families.family.fitted of the distribution family for more
138 information.
139 n_trials : ndarray
140 See Notes. Note that `n_trials` is a reference to the data so that if
141 data is already an array and it is changed, then `n_trials` changes
142 as well. `n_trials` is the number of binomial trials and only available
143 with that distribution. See statsmodels.families.Binomial for more
144 information.
145 normalized_cov_params : ndarray
146 The p x p normalized covariance of the design / exogenous data.
147 This is approximately equal to (X.T X)^(-1)
148 offset : array_like
149 Include offset in model with coefficient constrained to 1.
150 scale : float
151 The estimate of the scale / dispersion of the model fit. Only
152 available after fit is called. See GLM.fit and GLM.estimate_scale
153 for more information.
154 scaletype : str
155 The scaling used for fitting the model. This is only available after
156 fit is called. The default is None. See GLM.fit for more information.
157 weights : ndarray
158 The value of the weights after the last iteration of fit. Only
159 available after fit is called. See statsmodels.families.family for
160 the specific distribution weighting functions.
162 Examples
163 --------
164 >>> import statsmodels.api as sm
165 >>> data = sm.datasets.scotland.load(as_pandas=False)
166 >>> data.exog = sm.add_constant(data.exog)
168 Instantiate a gamma family model with the default link function.
170 >>> gamma_model = sm.GLM(data.endog, data.exog,
171 ... family=sm.families.Gamma())
173 >>> gamma_results = gamma_model.fit()
174 >>> gamma_results.params
175 array([-0.01776527, 0.00004962, 0.00203442, -0.00007181, 0.00011185,
176 -0.00000015, -0.00051868, -0.00000243])
177 >>> gamma_results.scale
178 0.0035842831734919055
179 >>> gamma_results.deviance
180 0.087388516416999198
181 >>> gamma_results.pearson_chi2
182 0.086022796163805704
183 >>> gamma_results.llf
184 -83.017202161073527
186 See Also
187 --------
188 statsmodels.genmod.families.family.Family
189 :ref:`families`
190 :ref:`links`
192 Notes
193 -----
194 Only the following combinations make sense for family and link:
196 ============= ===== === ===== ====== ======= === ==== ====== ====== ====
197 Family ident log logit probit cloglog pow opow nbinom loglog logc
198 ============= ===== === ===== ====== ======= === ==== ====== ====== ====
199 Gaussian x x x x x x x x x
200 inv Gaussian x x x
201 binomial x x x x x x x x x
202 Poisson x x x
203 neg binomial x x x x
204 gamma x x x
205 Tweedie x x x
206 ============= ===== === ===== ====== ======= === ==== ====== ====== ====
208 Not all of these link functions are currently available.
210 Endog and exog are references so that if the data they refer to are already
211 arrays and these arrays are changed, endog and exog will change.
213 statsmodels supports two separate definitions of weights: frequency weights
214 and variance weights.
216 Frequency weights produce the same results as repeating observations by the
217 frequencies (if those are integers). Frequency weights will keep the number
218 of observations consistent, but the degrees of freedom will change to
219 reflect the new weights.
221 Variance weights (referred to in other packages as analytic weights) are
222 used when ``endog`` represents an an average or mean. This relies on the
223 assumption that that the inverse variance scales proportionally to the
224 weight--an observation that is deemed more credible should have less
225 variance and therefore have more weight. For the ``Poisson`` family--which
226 assumes that occurrences scale proportionally with time--a natural practice
227 would be to use the amount of time as the variance weight and set ``endog``
228 to be a rate (occurrences per period of time). Similarly, using a
229 compound Poisson family, namely ``Tweedie``, makes a similar assumption
230 about the rate (or frequency) of occurrences having variance proportional to
231 time.
233 Both frequency and variance weights are verified for all basic results with
234 nonrobust or heteroscedasticity robust ``cov_type``. Other robust
235 covariance types have not yet been verified, and at least the small sample
236 correction is currently not based on the correct total frequency count.
238 Currently, all residuals are not weighted by frequency, although they may
239 incorporate ``n_trials`` for ``Binomial`` and ``var_weights``
241 +---------------+----------------------------------+
242 | Residual Type | Applicable weights |
243 +===============+==================================+
244 | Anscombe | ``var_weights`` |
245 +---------------+----------------------------------+
246 | Deviance | ``var_weights`` |
247 +---------------+----------------------------------+
248 | Pearson | ``var_weights`` and ``n_trials`` |
249 +---------------+----------------------------------+
250 | Reponse | ``n_trials`` |
251 +---------------+----------------------------------+
252 | Working | ``n_trials`` |
253 +---------------+----------------------------------+
255 WARNING: Loglikelihood and deviance are not valid in models where
256 scale is equal to 1 (i.e., ``Binomial``, ``NegativeBinomial``, and
257 ``Poisson``). If variance weights are specified, then results such as
258 ``loglike`` and ``deviance`` are based on a quasi-likelihood
259 interpretation. The loglikelihood is not correctly specified in this case,
260 and statistics based on it, such AIC or likelihood ratio tests, are not
261 appropriate.
262 """ % {'extra_params': base._missing_param_doc}
263 # Maximum number of endogenous variables when using a formula
264 _formula_max_endog = 2
266 def __init__(self, endog, exog, family=None, offset=None,
267 exposure=None, freq_weights=None, var_weights=None,
268 missing='none', **kwargs):
270 if (family is not None) and not isinstance(family.link,
271 tuple(family.safe_links)):
273 import warnings
274 warnings.warn(("The %s link function does not respect the domain "
275 "of the %s family.") %
276 (family.link.__class__.__name__,
277 family.__class__.__name__),
278 DomainWarning)
280 if exposure is not None:
281 exposure = np.log(exposure)
282 if offset is not None: # this should probably be done upstream
283 offset = np.asarray(offset)
285 if freq_weights is not None:
286 freq_weights = np.asarray(freq_weights)
287 if var_weights is not None:
288 var_weights = np.asarray(var_weights)
290 self.freq_weights = freq_weights
291 self.var_weights = var_weights
293 super(GLM, self).__init__(endog, exog, missing=missing,
294 offset=offset, exposure=exposure,
295 freq_weights=freq_weights,
296 var_weights=var_weights, **kwargs)
297 self._check_inputs(family, self.offset, self.exposure, self.endog,
298 self.freq_weights, self.var_weights)
299 if offset is None:
300 delattr(self, 'offset')
301 if exposure is None:
302 delattr(self, 'exposure')
304 self.nobs = self.endog.shape[0]
306 # things to remove_data
307 self._data_attr.extend(['weights', 'mu', 'freq_weights',
308 'var_weights', 'iweights', '_offset_exposure',
309 'n_trials'])
310 # register kwds for __init__, offset and exposure are added by super
311 self._init_keys.append('family')
313 self._setup_binomial()
314 # internal usage for recreating a model
315 if 'n_trials' in kwargs:
316 self.n_trials = kwargs['n_trials']
318 # Construct a combined offset/exposure term. Note that
319 # exposure has already been logged if present.
320 offset_exposure = 0.
321 if hasattr(self, 'offset'):
322 offset_exposure = self.offset
323 if hasattr(self, 'exposure'):
324 offset_exposure = offset_exposure + self.exposure
325 self._offset_exposure = offset_exposure
327 self.scaletype = None
329 def initialize(self):
330 """
331 Initialize a generalized linear model.
332 """
333 self.df_model = np.linalg.matrix_rank(self.exog) - 1
335 if (self.freq_weights is not None) and \
336 (self.freq_weights.shape[0] == self.endog.shape[0]):
337 self.wnobs = self.freq_weights.sum()
338 self.df_resid = self.wnobs - self.df_model - 1
339 else:
340 self.wnobs = self.exog.shape[0]
341 self.df_resid = self.exog.shape[0] - self.df_model - 1
343 def _check_inputs(self, family, offset, exposure, endog, freq_weights,
344 var_weights):
346 # Default family is Gaussian
347 if family is None:
348 family = families.Gaussian()
349 self.family = family
351 if exposure is not None:
352 if not isinstance(self.family.link, families.links.Log):
353 raise ValueError("exposure can only be used with the log "
354 "link function")
355 elif exposure.shape[0] != endog.shape[0]:
356 raise ValueError("exposure is not the same length as endog")
358 if offset is not None:
359 if offset.shape[0] != endog.shape[0]:
360 raise ValueError("offset is not the same length as endog")
362 if freq_weights is not None:
363 if freq_weights.shape[0] != endog.shape[0]:
364 raise ValueError("freq weights not the same length as endog")
365 if len(freq_weights.shape) > 1:
366 raise ValueError("freq weights has too many dimensions")
368 # internal flag to store whether freq_weights were not None
369 self._has_freq_weights = (self.freq_weights is not None)
370 if self.freq_weights is None:
371 self.freq_weights = np.ones((endog.shape[0]))
372 # TODO: check do we want to keep None as sentinel for freq_weights
374 if np.shape(self.freq_weights) == () and self.freq_weights > 1:
375 self.freq_weights = (self.freq_weights *
376 np.ones((endog.shape[0])))
378 if var_weights is not None:
379 if var_weights.shape[0] != endog.shape[0]:
380 raise ValueError("var weights not the same length as endog")
381 if len(var_weights.shape) > 1:
382 raise ValueError("var weights has too many dimensions")
384 # internal flag to store whether var_weights were not None
385 self._has_var_weights = (var_weights is not None)
386 if var_weights is None:
387 self.var_weights = np.ones((endog.shape[0]))
388 # TODO: check do we want to keep None as sentinel for var_weights
389 self.iweights = np.asarray(self.freq_weights * self.var_weights)
391 def _get_init_kwds(self):
392 # this is a temporary fixup because exposure has been transformed
393 # see #1609, copied from discrete_model.CountModel
394 kwds = super(GLM, self)._get_init_kwds()
395 if 'exposure' in kwds and kwds['exposure'] is not None:
396 kwds['exposure'] = np.exp(kwds['exposure'])
397 return kwds
399 def loglike_mu(self, mu, scale=1.):
400 """
401 Evaluate the log-likelihood for a generalized linear model.
402 """
403 return self.family.loglike(self.endog, mu, self.var_weights,
404 self.freq_weights, scale)
406 def loglike(self, params, scale=None):
407 """
408 Evaluate the log-likelihood for a generalized linear model.
409 """
410 lin_pred = np.dot(self.exog, params) + self._offset_exposure
411 expval = self.family.link.inverse(lin_pred)
412 if scale is None:
413 scale = self.estimate_scale(expval)
414 llf = self.family.loglike(self.endog, expval, self.var_weights,
415 self.freq_weights, scale)
416 return llf
418 def score_obs(self, params, scale=None):
419 """score first derivative of the loglikelihood for each observation.
421 Parameters
422 ----------
423 params : ndarray
424 parameter at which score is evaluated
425 scale : None or float
426 If scale is None, then the default scale will be calculated.
427 Default scale is defined by `self.scaletype` and set in fit.
428 If scale is not None, then it is used as a fixed scale.
430 Returns
431 -------
432 score_obs : ndarray, 2d
433 The first derivative of the loglikelihood function evaluated at
434 params for each observation.
435 """
437 score_factor = self.score_factor(params, scale=scale)
438 return score_factor[:, None] * self.exog
440 def score(self, params, scale=None):
441 """score, first derivative of the loglikelihood function
443 Parameters
444 ----------
445 params : ndarray
446 parameter at which score is evaluated
447 scale : None or float
448 If scale is None, then the default scale will be calculated.
449 Default scale is defined by `self.scaletype` and set in fit.
450 If scale is not None, then it is used as a fixed scale.
452 Returns
453 -------
454 score : ndarray_1d
455 The first derivative of the loglikelihood function calculated as
456 the sum of `score_obs`
457 """
458 score_factor = self.score_factor(params, scale=scale)
459 return np.dot(score_factor, self.exog)
461 def score_factor(self, params, scale=None):
462 """weights for score for each observation
464 This can be considered as score residuals.
466 Parameters
467 ----------
468 params : ndarray
469 parameter at which score is evaluated
470 scale : None or float
471 If scale is None, then the default scale will be calculated.
472 Default scale is defined by `self.scaletype` and set in fit.
473 If scale is not None, then it is used as a fixed scale.
475 Returns
476 -------
477 score_factor : ndarray_1d
478 A 1d weight vector used in the calculation of the score_obs.
479 The score_obs are obtained by `score_factor[:, None] * exog`
480 """
481 mu = self.predict(params)
482 if scale is None:
483 scale = self.estimate_scale(mu)
485 score_factor = (self.endog - mu) / self.family.link.deriv(mu)
486 score_factor /= self.family.variance(mu)
487 score_factor *= self.iweights * self.n_trials
489 if not scale == 1:
490 score_factor /= scale
492 return score_factor
494 def hessian_factor(self, params, scale=None, observed=True):
495 """Weights for calculating Hessian
497 Parameters
498 ----------
499 params : ndarray
500 parameter at which Hessian is evaluated
501 scale : None or float
502 If scale is None, then the default scale will be calculated.
503 Default scale is defined by `self.scaletype` and set in fit.
504 If scale is not None, then it is used as a fixed scale.
505 observed : bool
506 If True, then the observed Hessian is returned. If false then the
507 expected information matrix is returned.
509 Returns
510 -------
511 hessian_factor : ndarray, 1d
512 A 1d weight vector used in the calculation of the Hessian.
513 The hessian is obtained by `(exog.T * hessian_factor).dot(exog)`
514 """
516 # calculating eim_factor
517 mu = self.predict(params)
518 if scale is None:
519 scale = self.estimate_scale(mu)
521 eim_factor = 1 / (self.family.link.deriv(mu)**2 *
522 self.family.variance(mu))
523 eim_factor *= self.iweights * self.n_trials
525 if not observed:
526 if not scale == 1:
527 eim_factor /= scale
528 return eim_factor
530 # calculating oim_factor, eim_factor is with scale=1
532 score_factor = self.score_factor(params, scale=1.)
533 if eim_factor.ndim > 1 or score_factor.ndim > 1:
534 raise RuntimeError('something wrong')
536 tmp = self.family.variance(mu) * self.family.link.deriv2(mu)
537 tmp += self.family.variance.deriv(mu) * self.family.link.deriv(mu)
539 tmp = score_factor * tmp
540 # correct for duplicatee iweights in oim_factor and score_factor
541 tmp /= self.iweights * self.n_trials
542 oim_factor = eim_factor * (1 + tmp)
544 if tmp.ndim > 1:
545 raise RuntimeError('something wrong')
547 if not scale == 1:
548 oim_factor /= scale
550 return oim_factor
552 def hessian(self, params, scale=None, observed=None):
553 """Hessian, second derivative of loglikelihood function
555 Parameters
556 ----------
557 params : ndarray
558 parameter at which Hessian is evaluated
559 scale : None or float
560 If scale is None, then the default scale will be calculated.
561 Default scale is defined by `self.scaletype` and set in fit.
562 If scale is not None, then it is used as a fixed scale.
563 observed : bool
564 If True, then the observed Hessian is returned (default).
565 If false then the expected information matrix is returned.
567 Returns
568 -------
569 hessian : ndarray
570 Hessian, i.e. observed information, or expected information matrix.
571 """
572 if observed is None:
573 if getattr(self, '_optim_hessian', None) == 'eim':
574 observed = False
575 else:
576 observed = True
578 tmp = getattr(self, '_tmp_like_exog', np.empty_like(self.exog))
580 factor = self.hessian_factor(params, scale=scale, observed=observed)
581 np.multiply(self.exog.T, factor, out=tmp.T)
582 return -tmp.T.dot(self.exog)
584 def information(self, params, scale=None):
585 """
586 Fisher information matrix.
587 """
588 return self.hessian(params, scale=scale, observed=False)
590 def _deriv_mean_dparams(self, params):
591 """
592 Derivative of the expected endog with respect to the parameters.
594 Parameters
595 ----------
596 params : ndarray
597 parameter at which score is evaluated
599 Returns
600 -------
601 The value of the derivative of the expected endog with respect
602 to the parameter vector.
603 """
604 lin_pred = self.predict(params, linear=True)
605 idl = self.family.link.inverse_deriv(lin_pred)
606 dmat = self.exog * idl[:, None]
607 return dmat
609 def _deriv_score_obs_dendog(self, params, scale=None):
610 """derivative of score_obs w.r.t. endog
612 Parameters
613 ----------
614 params : ndarray
615 parameter at which score is evaluated
616 scale : None or float
617 If scale is None, then the default scale will be calculated.
618 Default scale is defined by `self.scaletype` and set in fit.
619 If scale is not None, then it is used as a fixed scale.
621 Returns
622 -------
623 derivative : ndarray_2d
624 The derivative of the score_obs with respect to endog. This
625 can is given by `score_factor0[:, None] * exog` where
626 `score_factor0` is the score_factor without the residual.
627 """
628 mu = self.predict(params)
629 if scale is None:
630 scale = self.estimate_scale(mu)
632 score_factor = 1 / self.family.link.deriv(mu)
633 score_factor /= self.family.variance(mu)
634 score_factor *= self.iweights * self.n_trials
636 if not scale == 1:
637 score_factor /= scale
639 return score_factor[:, None] * self.exog
641 def score_test(self, params_constrained, k_constraints=None,
642 exog_extra=None, observed=True):
643 """score test for restrictions or for omitted variables
645 The covariance matrix for the score is based on the Hessian, i.e.
646 observed information matrix or optionally on the expected information
647 matrix..
649 Parameters
650 ----------
651 params_constrained : array_like
652 estimated parameter of the restricted model. This can be the
653 parameter estimate for the current when testing for omitted
654 variables.
655 k_constraints : int or None
656 Number of constraints that were used in the estimation of params
657 restricted relative to the number of exog in the model.
658 This must be provided if no exog_extra are given. If exog_extra is
659 not None, then k_constraints is assumed to be zero if it is None.
660 exog_extra : None or array_like
661 Explanatory variables that are jointly tested for inclusion in the
662 model, i.e. omitted variables.
663 observed : bool
664 If True, then the observed Hessian is used in calculating the
665 covariance matrix of the score. If false then the expected
666 information matrix is used.
668 Returns
669 -------
670 chi2_stat : float
671 chisquare statistic for the score test
672 p-value : float
673 P-value of the score test based on the chisquare distribution.
674 df : int
675 Degrees of freedom used in the p-value calculation. This is equal
676 to the number of constraints.
678 Notes
679 -----
680 not yet verified for case with scale not equal to 1.
681 """
683 if exog_extra is None:
684 if k_constraints is None:
685 raise ValueError('if exog_extra is None, then k_constraints'
686 'needs to be given')
688 score = self.score(params_constrained)
689 hessian = self.hessian(params_constrained, observed=observed)
691 else:
692 # exog_extra = np.asarray(exog_extra)
693 if k_constraints is None:
694 k_constraints = 0
696 ex = np.column_stack((self.exog, exog_extra))
697 k_constraints += ex.shape[1] - self.exog.shape[1]
699 score_factor = self.score_factor(params_constrained)
700 score = (score_factor[:, None] * ex).sum(0)
701 hessian_factor = self.hessian_factor(params_constrained,
702 observed=observed)
703 hessian = -np.dot(ex.T * hessian_factor, ex)
705 from scipy import stats
706 # TODO check sign, why minus?
707 chi2stat = -score.dot(np.linalg.solve(hessian, score[:, None]))
708 pval = stats.chi2.sf(chi2stat, k_constraints)
709 # return a stats results instance instead? Contrast?
710 return chi2stat, pval, k_constraints
712 def _update_history(self, tmp_result, mu, history):
713 """
714 Helper method to update history during iterative fit.
715 """
716 history['params'].append(tmp_result.params)
717 history['deviance'].append(self.family.deviance(self.endog, mu,
718 self.var_weights,
719 self.freq_weights,
720 self.scale))
721 return history
723 def estimate_scale(self, mu):
724 """
725 Estimate the dispersion/scale.
727 Type of scale can be chose in the fit method.
729 Parameters
730 ----------
731 mu : ndarray
732 mu is the mean response estimate
734 Returns
735 -------
736 Estimate of scale
738 Notes
739 -----
740 The default scale for Binomial, Poisson and Negative Binomial
741 families is 1. The default for the other families is Pearson's
742 Chi-Square estimate.
744 See Also
745 --------
746 statsmodels.genmod.generalized_linear_model.GLM.fit
747 """
748 if not self.scaletype:
749 if isinstance(self.family, (families.Binomial, families.Poisson,
750 families.NegativeBinomial)):
751 return 1.
752 else:
753 return self._estimate_x2_scale(mu)
755 if isinstance(self.scaletype, float):
756 return np.array(self.scaletype)
758 if isinstance(self.scaletype, str):
759 if self.scaletype.lower() == 'x2':
760 return self._estimate_x2_scale(mu)
761 elif self.scaletype.lower() == 'dev':
762 return (self.family.deviance(self.endog, mu, self.var_weights,
763 self.freq_weights, 1.) /
764 (self.df_resid))
765 else:
766 raise ValueError("Scale %s with type %s not understood" %
767 (self.scaletype, type(self.scaletype)))
768 else:
769 raise ValueError("Scale %s with type %s not understood" %
770 (self.scaletype, type(self.scaletype)))
772 def _estimate_x2_scale(self, mu):
773 resid = np.power(self.endog - mu, 2) * self.iweights
774 return np.sum(resid / self.family.variance(mu)) / self.df_resid
776 def estimate_tweedie_power(self, mu, method='brentq', low=1.01, high=5.):
777 """
778 Tweedie specific function to estimate scale and the variance parameter.
779 The variance parameter is also referred to as p, xi, or shape.
781 Parameters
782 ----------
783 mu : array_like
784 Fitted mean response variable
785 method : str, defaults to 'brentq'
786 Scipy optimizer used to solve the Pearson equation. Only brentq
787 currently supported.
788 low : float, optional
789 Low end of the bracketing interval [a,b] to be used in the search
790 for the power. Defaults to 1.01.
791 high : float, optional
792 High end of the bracketing interval [a,b] to be used in the search
793 for the power. Defaults to 5.
795 Returns
796 -------
797 power : float
798 The estimated shape or power.
799 """
800 if method == 'brentq':
801 from scipy.optimize import brentq
803 def psi_p(power, mu):
804 scale = ((self.iweights * (self.endog - mu) ** 2 /
805 (mu ** power)).sum() / self.df_resid)
806 return (np.sum(self.iweights * ((self.endog - mu) ** 2 /
807 (scale * (mu ** power)) - 1) *
808 np.log(mu)) / self.freq_weights.sum())
809 power = brentq(psi_p, low, high, args=(mu))
810 else:
811 raise NotImplementedError('Only brentq can currently be used')
812 return power
814 def predict(self, params, exog=None, exposure=None, offset=None,
815 linear=False):
816 """
817 Return predicted values for a design matrix
819 Parameters
820 ----------
821 params : array_like
822 Parameters / coefficients of a GLM.
823 exog : array_like, optional
824 Design / exogenous data. Is exog is None, model exog is used.
825 exposure : array_like, optional
826 Exposure time values, only can be used with the log link
827 function. See notes for details.
828 offset : array_like, optional
829 Offset values. See notes for details.
830 linear : bool
831 If True, returns the linear predicted values. If False,
832 returns the value of the inverse of the model's link function at
833 the linear predicted values.
835 Returns
836 -------
837 An array of fitted values
839 Notes
840 -----
841 Any `exposure` and `offset` provided here take precedence over
842 the `exposure` and `offset` used in the model fit. If `exog`
843 is passed as an argument here, then any `exposure` and
844 `offset` values in the fit will be ignored.
846 Exposure values must be strictly positive.
847 """
849 # Use fit offset if appropriate
850 if offset is None and exog is None and hasattr(self, 'offset'):
851 offset = self.offset
852 elif offset is None:
853 offset = 0.
855 if exposure is not None and not isinstance(self.family.link,
856 families.links.Log):
857 raise ValueError("exposure can only be used with the log link "
858 "function")
860 # Use fit exposure if appropriate
861 if exposure is None and exog is None and hasattr(self, 'exposure'):
862 # Already logged
863 exposure = self.exposure
864 elif exposure is None:
865 exposure = 0.
866 else:
867 exposure = np.log(exposure)
869 if exog is None:
870 exog = self.exog
872 linpred = np.dot(exog, params) + offset + exposure
873 if linear:
874 return linpred
875 else:
876 return self.family.fitted(linpred)
878 def get_distribution(self, params, scale=1, exog=None, exposure=None,
879 offset=None):
880 """
881 Return a random number generator for the predictive distribution.
883 Parameters
884 ----------
885 params : array_like
886 The model parameters.
887 scale : scalar
888 The scale parameter.
889 exog : array_like
890 The predictor variable matrix.
892 Returns
893 -------
894 gen
895 Frozen random number generator object. Use the ``rvs`` method to
896 generate random values.
898 Notes
899 -----
900 Due to the behavior of ``scipy.stats.distributions objects``, the
901 returned random number generator must be called with ``gen.rvs(n)``
902 where ``n`` is the number of observations in the data set used
903 to fit the model. If any other value is used for ``n``, misleading
904 results will be produced.
905 """
907 fit = self.predict(params, exog, exposure, offset, linear=False)
909 import scipy.stats.distributions as dist
911 if isinstance(self.family, families.Gaussian):
912 return dist.norm(loc=fit, scale=np.sqrt(scale))
914 elif isinstance(self.family, families.Binomial):
915 return dist.binom(n=1, p=fit)
917 elif isinstance(self.family, families.Poisson):
918 return dist.poisson(mu=fit)
920 elif isinstance(self.family, families.Gamma):
921 alpha = fit / float(scale)
922 return dist.gamma(alpha, scale=scale)
924 else:
925 raise ValueError("get_distribution not implemented for %s" %
926 self.family.name)
928 def _setup_binomial(self):
929 # this checks what kind of data is given for Binomial.
930 # family will need a reference to endog if this is to be removed from
931 # preprocessing
932 self.n_trials = np.ones((self.endog.shape[0])) # For binomial
933 if isinstance(self.family, families.Binomial):
934 tmp = self.family.initialize(self.endog, self.freq_weights)
935 self.endog = tmp[0]
936 self.n_trials = tmp[1]
937 self._init_keys.append('n_trials')
939 def fit(self, start_params=None, maxiter=100, method='IRLS', tol=1e-8,
940 scale=None, cov_type='nonrobust', cov_kwds=None, use_t=None,
941 full_output=True, disp=False, max_start_irls=3, **kwargs):
942 """
943 Fits a generalized linear model for a given family.
945 Parameters
946 ----------
947 start_params : array_like, optional
948 Initial guess of the solution for the loglikelihood maximization.
949 The default is family-specific and is given by the
950 ``family.starting_mu(endog)``. If start_params is given then the
951 initial mean will be calculated as ``np.dot(exog, start_params)``.
952 maxiter : int, optional
953 Default is 100.
954 method : str
955 Default is 'IRLS' for iteratively reweighted least squares.
956 Otherwise gradient optimization is used.
957 tol : float
958 Convergence tolerance. Default is 1e-8.
959 scale : str or float, optional
960 `scale` can be 'X2', 'dev', or a float
961 The default value is None, which uses `X2` for Gamma, Gaussian,
962 and Inverse Gaussian.
963 `X2` is Pearson's chi-square divided by `df_resid`.
964 The default is 1 for the Binomial and Poisson families.
965 `dev` is the deviance divided by df_resid
966 cov_type : str
967 The type of parameter estimate covariance matrix to compute.
968 cov_kwds : dict-like
969 Extra arguments for calculating the covariance of the parameter
970 estimates.
971 use_t : bool
972 If True, the Student t-distribution is used for inference.
973 full_output : bool, optional
974 Set to True to have all available output in the Results object's
975 mle_retvals attribute. The output is dependent on the solver.
976 See LikelihoodModelResults notes section for more information.
977 Not used if methhod is IRLS.
978 disp : bool, optional
979 Set to True to print convergence messages. Not used if method is
980 IRLS.
981 max_start_irls : int
982 The number of IRLS iterations used to obtain starting
983 values for gradient optimization. Only relevant if
984 `method` is set to something other than 'IRLS'.
985 atol : float, optional
986 (available with IRLS fits) The absolute tolerance criterion that
987 must be satisfied. Defaults to ``tol``. Convergence is attained
988 when: :math:`rtol * prior + atol > abs(current - prior)`
989 rtol : float, optional
990 (available with IRLS fits) The relative tolerance criterion that
991 must be satisfied. Defaults to 0 which means ``rtol`` is not used.
992 Convergence is attained when:
993 :math:`rtol * prior + atol > abs(current - prior)`
994 tol_criterion : str, optional
995 (available with IRLS fits) Defaults to ``'deviance'``. Can
996 optionally be ``'params'``.
997 wls_method : str, optional
998 (available with IRLS fits) options are 'lstsq', 'pinv' and 'qr'
999 specifies which linear algebra function to use for the irls
1000 optimization. Default is `lstsq` which uses the same underlying
1001 svd based approach as 'pinv', but is faster during iterations.
1002 'lstsq' and 'pinv' regularize the estimate in singular and
1003 near-singular cases by truncating small singular values based
1004 on `rcond` of the respective numpy.linalg function. 'qr' is
1005 only valid for cases that are not singular nor near-singular.
1006 optim_hessian : {'eim', 'oim'}, optional
1007 (available with scipy optimizer fits) When 'oim'--the default--the
1008 observed Hessian is used in fitting. 'eim' is the expected Hessian.
1009 This may provide more stable fits, but adds assumption that the
1010 Hessian is correctly specified.
1012 Notes
1013 -----
1014 If method is 'IRLS', then an additional keyword 'attach_wls' is
1015 available. This is currently for internal use only and might change
1016 in future versions. If attach_wls' is true, then the final WLS
1017 instance of the IRLS iteration is attached to the results instance
1018 as `results_wls` attribute.
1019 """
1020 self.scaletype = scale
1022 if method.lower() == "irls":
1023 if cov_type.lower() == 'eim':
1024 cov_type = 'nonrobust'
1025 return self._fit_irls(start_params=start_params, maxiter=maxiter,
1026 tol=tol, scale=scale, cov_type=cov_type,
1027 cov_kwds=cov_kwds, use_t=use_t, **kwargs)
1028 else:
1029 self._optim_hessian = kwargs.get('optim_hessian')
1030 self._tmp_like_exog = np.empty_like(self.exog)
1031 fit_ = self._fit_gradient(start_params=start_params,
1032 method=method,
1033 maxiter=maxiter,
1034 tol=tol, scale=scale,
1035 full_output=full_output,
1036 disp=disp, cov_type=cov_type,
1037 cov_kwds=cov_kwds, use_t=use_t,
1038 max_start_irls=max_start_irls,
1039 **kwargs)
1040 del self._optim_hessian
1041 del self._tmp_like_exog
1042 return fit_
1044 def _fit_gradient(self, start_params=None, method="newton",
1045 maxiter=100, tol=1e-8, full_output=True,
1046 disp=True, scale=None, cov_type='nonrobust',
1047 cov_kwds=None, use_t=None, max_start_irls=3,
1048 **kwargs):
1049 """
1050 Fits a generalized linear model for a given family iteratively
1051 using the scipy gradient optimizers.
1052 """
1054 # fix scale during optimization, see #4616
1055 scaletype = self.scaletype
1056 self.scaletype = 1.
1058 if (max_start_irls > 0) and (start_params is None):
1059 irls_rslt = self._fit_irls(start_params=start_params,
1060 maxiter=max_start_irls,
1061 tol=tol, scale=1., cov_type='nonrobust',
1062 cov_kwds=None, use_t=None,
1063 **kwargs)
1064 start_params = irls_rslt.params
1065 del irls_rslt
1067 rslt = super(GLM, self).fit(start_params=start_params, tol=tol,
1068 maxiter=maxiter, full_output=full_output,
1069 method=method, disp=disp, **kwargs)
1071 # reset scaletype to original
1072 self.scaletype = scaletype
1074 mu = self.predict(rslt.params)
1075 scale = self.estimate_scale(mu)
1077 if rslt.normalized_cov_params is None:
1078 cov_p = None
1079 else:
1080 cov_p = rslt.normalized_cov_params / scale
1082 if cov_type.lower() == 'eim':
1083 oim = False
1084 cov_type = 'nonrobust'
1085 else:
1086 oim = True
1088 try:
1089 cov_p = np.linalg.inv(-self.hessian(rslt.params, observed=oim)) / scale
1090 except LinAlgError:
1091 from warnings import warn
1092 warn('Inverting hessian failed, no bse or cov_params '
1093 'available', HessianInversionWarning)
1094 cov_p = None
1096 results_class = getattr(self, '_results_class', GLMResults)
1097 results_class_wrapper = getattr(self, '_results_class_wrapper', GLMResultsWrapper)
1098 glm_results = results_class(self, rslt.params,
1099 cov_p,
1100 scale,
1101 cov_type=cov_type, cov_kwds=cov_kwds,
1102 use_t=use_t)
1104 # TODO: iteration count is not always available
1105 history = {'iteration': 0}
1106 if full_output:
1107 glm_results.mle_retvals = rslt.mle_retvals
1108 if 'iterations' in rslt.mle_retvals:
1109 history['iteration'] = rslt.mle_retvals['iterations']
1110 glm_results.method = method
1111 glm_results.fit_history = history
1113 return results_class_wrapper(glm_results)
1115 def _fit_irls(self, start_params=None, maxiter=100, tol=1e-8,
1116 scale=None, cov_type='nonrobust', cov_kwds=None,
1117 use_t=None, **kwargs):
1118 """
1119 Fits a generalized linear model for a given family using
1120 iteratively reweighted least squares (IRLS).
1121 """
1122 attach_wls = kwargs.pop('attach_wls', False)
1123 atol = kwargs.get('atol')
1124 rtol = kwargs.get('rtol', 0.)
1125 tol_criterion = kwargs.get('tol_criterion', 'deviance')
1126 wls_method = kwargs.get('wls_method', 'lstsq')
1127 atol = tol if atol is None else atol
1129 endog = self.endog
1130 wlsexog = self.exog
1131 if start_params is None:
1132 start_params = np.zeros(self.exog.shape[1], np.float)
1133 mu = self.family.starting_mu(self.endog)
1134 lin_pred = self.family.predict(mu)
1135 else:
1136 lin_pred = np.dot(wlsexog, start_params) + self._offset_exposure
1137 mu = self.family.fitted(lin_pred)
1138 self.scale = self.estimate_scale(mu)
1139 dev = self.family.deviance(self.endog, mu, self.var_weights,
1140 self.freq_weights, self.scale)
1141 if np.isnan(dev):
1142 raise ValueError("The first guess on the deviance function "
1143 "returned a nan. This could be a boundary "
1144 " problem and should be reported.")
1146 # first guess on the deviance is assumed to be scaled by 1.
1147 # params are none to start, so they line up with the deviance
1148 history = dict(params=[np.inf, start_params], deviance=[np.inf, dev])
1149 converged = False
1150 criterion = history[tol_criterion]
1151 # This special case is used to get the likelihood for a specific
1152 # params vector.
1153 if maxiter == 0:
1154 mu = self.family.fitted(lin_pred)
1155 self.scale = self.estimate_scale(mu)
1156 wls_results = lm.RegressionResults(self, start_params, None)
1157 iteration = 0
1158 for iteration in range(maxiter):
1159 self.weights = (self.iweights * self.n_trials *
1160 self.family.weights(mu))
1161 wlsendog = (lin_pred + self.family.link.deriv(mu) * (self.endog-mu)
1162 - self._offset_exposure)
1163 wls_mod = reg_tools._MinimalWLS(wlsendog, wlsexog,
1164 self.weights, check_endog=True,
1165 check_weights=True)
1166 wls_results = wls_mod.fit(method=wls_method)
1167 lin_pred = np.dot(self.exog, wls_results.params)
1168 lin_pred += self._offset_exposure
1169 mu = self.family.fitted(lin_pred)
1170 history = self._update_history(wls_results, mu, history)
1171 self.scale = self.estimate_scale(mu)
1172 if endog.squeeze().ndim == 1 and np.allclose(mu - endog, 0):
1173 msg = "Perfect separation detected, results not available"
1174 raise PerfectSeparationError(msg)
1175 converged = _check_convergence(criterion, iteration + 1, atol,
1176 rtol)
1177 if converged:
1178 break
1179 self.mu = mu
1181 if maxiter > 0: # Only if iterative used
1182 wls_method2 = 'pinv' if wls_method == 'lstsq' else wls_method
1183 wls_model = lm.WLS(wlsendog, wlsexog, self.weights)
1184 wls_results = wls_model.fit(method=wls_method2)
1186 glm_results = GLMResults(self, wls_results.params,
1187 wls_results.normalized_cov_params,
1188 self.scale,
1189 cov_type=cov_type, cov_kwds=cov_kwds,
1190 use_t=use_t)
1192 glm_results.method = "IRLS"
1193 glm_results.mle_settings = {}
1194 glm_results.mle_settings['wls_method'] = wls_method
1195 glm_results.mle_settings['optimizer'] = glm_results.method
1196 if (maxiter > 0) and (attach_wls is True):
1197 glm_results.results_wls = wls_results
1198 history['iteration'] = iteration + 1
1199 glm_results.fit_history = history
1200 glm_results.converged = converged
1201 return GLMResultsWrapper(glm_results)
1203 def fit_regularized(self, method="elastic_net", alpha=0.,
1204 start_params=None, refit=False,
1205 opt_method="bfgs", **kwargs):
1206 r"""
1207 Return a regularized fit to a linear regression model.
1209 Parameters
1210 ----------
1211 method : {'elastic_net'}
1212 Only the `elastic_net` approach is currently implemented.
1213 alpha : scalar or array_like
1214 The penalty weight. If a scalar, the same penalty weight
1215 applies to all variables in the model. If a vector, it
1216 must have the same length as `params`, and contains a
1217 penalty weight for each coefficient.
1218 start_params : array_like
1219 Starting values for `params`.
1220 refit : bool
1221 If True, the model is refit using only the variables that
1222 have non-zero coefficients in the regularized fit. The
1223 refitted model is not regularized.
1224 opt_method : string
1225 The method used for numerical optimization.
1226 **kwargs
1227 Additional keyword arguments used when fitting the model.
1229 Returns
1230 -------
1231 GLMResults
1232 An array or a GLMResults object, same type returned by `fit`.
1234 Notes
1235 -----
1236 The penalty is the ``elastic net`` penalty, which is a
1237 combination of L1 and L2 penalties.
1239 The function that is minimized is:
1241 .. math::
1243 -loglike/n + alpha*((1-L1\_wt)*|params|_2^2/2 + L1\_wt*|params|_1)
1245 where :math:`|*|_1` and :math:`|*|_2` are the L1 and L2 norms.
1247 Post-estimation results are based on the same data used to
1248 select variables, hence may be subject to overfitting biases.
1250 The elastic_net method uses the following keyword arguments:
1252 maxiter : int
1253 Maximum number of iterations
1254 L1_wt : float
1255 Must be in [0, 1]. The L1 penalty has weight L1_wt and the
1256 L2 penalty has weight 1 - L1_wt.
1257 cnvrg_tol : float
1258 Convergence threshold for line searches
1259 zero_tol : float
1260 Coefficients below this threshold are treated as zero.
1261 """
1263 if kwargs.get("L1_wt", 1) == 0:
1264 return self._fit_ridge(alpha, start_params, opt_method)
1266 from statsmodels.base.elastic_net import fit_elasticnet
1268 if method != "elastic_net":
1269 raise ValueError("method for fit_regularied must be elastic_net")
1271 defaults = {"maxiter": 50, "L1_wt": 1, "cnvrg_tol": 1e-10,
1272 "zero_tol": 1e-10}
1273 defaults.update(kwargs)
1275 result = fit_elasticnet(self, method=method,
1276 alpha=alpha,
1277 start_params=start_params,
1278 refit=refit,
1279 **defaults)
1281 self.mu = self.predict(result.params)
1282 self.scale = self.estimate_scale(self.mu)
1284 return result
1286 def _fit_ridge(self, alpha, start_params, method):
1288 if start_params is None:
1289 start_params = np.zeros(self.exog.shape[1])
1291 def fun(x):
1292 return -(self.loglike(x) / self.nobs - np.sum(alpha * x**2) / 2)
1294 def grad(x):
1295 return -(self.score(x) / self.nobs - alpha * x)
1297 from scipy.optimize import minimize
1298 from statsmodels.base.elastic_net import (RegularizedResults,
1299 RegularizedResultsWrapper)
1301 mr = minimize(fun, start_params, jac=grad, method=method)
1302 params = mr.x
1304 if not mr.success:
1305 import warnings
1306 ngrad = np.sqrt(np.sum(mr.jac**2))
1307 msg = "GLM ridge optimization may have failed, |grad|=%f" % ngrad
1308 warnings.warn(msg)
1310 results = RegularizedResults(self, params)
1311 results = RegularizedResultsWrapper(results)
1313 return results
1315 def fit_constrained(self, constraints, start_params=None, **fit_kwds):
1316 """fit the model subject to linear equality constraints
1318 The constraints are of the form `R params = q`
1319 where R is the constraint_matrix and q is the vector of
1320 constraint_values.
1322 The estimation creates a new model with transformed design matrix,
1323 exog, and converts the results back to the original parameterization.
1326 Parameters
1327 ----------
1328 constraints : formula expression or tuple
1329 If it is a tuple, then the constraint needs to be given by two
1330 arrays (constraint_matrix, constraint_value), i.e. (R, q).
1331 Otherwise, the constraints can be given as strings or list of
1332 strings.
1333 see t_test for details
1334 start_params : None or array_like
1335 starting values for the optimization. `start_params` needs to be
1336 given in the original parameter space and are internally
1337 transformed.
1338 **fit_kwds : keyword arguments
1339 fit_kwds are used in the optimization of the transformed model.
1341 Returns
1342 -------
1343 results : Results instance
1344 """
1346 from patsy import DesignInfo
1347 from statsmodels.base._constraints import fit_constrained
1349 # same pattern as in base.LikelihoodModel.t_test
1350 lc = DesignInfo(self.exog_names).linear_constraint(constraints)
1351 R, q = lc.coefs, lc.constants
1353 # TODO: add start_params option, need access to tranformation
1354 # fit_constrained needs to do the transformation
1355 params, cov, res_constr = fit_constrained(self, R, q,
1356 start_params=start_params,
1357 fit_kwds=fit_kwds)
1358 # create dummy results Instance, TODO: wire up properly
1359 res = self.fit(start_params=params, maxiter=0) # we get a wrapper back
1360 res._results.params = params
1361 res._results.cov_params_default = cov
1362 cov_type = fit_kwds.get('cov_type', 'nonrobust')
1363 if cov_type != 'nonrobust':
1364 res._results.normalized_cov_params = cov / res_constr.scale
1365 else:
1366 res._results.normalized_cov_params = None
1367 res._results.scale = res_constr.scale
1368 k_constr = len(q)
1369 res._results.df_resid += k_constr
1370 res._results.df_model -= k_constr
1371 res._results.constraints = lc
1372 res._results.k_constr = k_constr
1373 res._results.results_constrained = res_constr
1374 return res
1377class GLMResults(base.LikelihoodModelResults):
1378 """
1379 Class to contain GLM results.
1381 GLMResults inherits from statsmodels.LikelihoodModelResults
1383 Attributes
1384 ----------
1385 df_model : float
1386 See GLM.df_model
1387 df_resid : float
1388 See GLM.df_resid
1389 fit_history : dict
1390 Contains information about the iterations. Its keys are `iterations`,
1391 `deviance` and `params`.
1392 model : class instance
1393 Pointer to GLM model instance that called fit.
1394 nobs : float
1395 The number of observations n.
1396 normalized_cov_params : ndarray
1397 See GLM docstring
1398 params : ndarray
1399 The coefficients of the fitted model. Note that interpretation
1400 of the coefficients often depends on the distribution family and the
1401 data.
1402 pvalues : ndarray
1403 The two-tailed p-values for the parameters.
1404 scale : float
1405 The estimate of the scale / dispersion for the model fit.
1406 See GLM.fit and GLM.estimate_scale for more information.
1407 stand_errors : ndarray
1408 The standard errors of the fitted GLM. #TODO still named bse
1410 See Also
1411 --------
1412 statsmodels.base.model.LikelihoodModelResults
1413 """
1415 def __init__(self, model, params, normalized_cov_params, scale,
1416 cov_type='nonrobust', cov_kwds=None, use_t=None):
1417 super(GLMResults, self).__init__(
1418 model,
1419 params,
1420 normalized_cov_params=normalized_cov_params,
1421 scale=scale)
1422 self.family = model.family
1423 self._endog = model.endog
1424 self.nobs = model.endog.shape[0]
1425 self._freq_weights = model.freq_weights
1426 self._var_weights = model.var_weights
1427 self._iweights = model.iweights
1428 if isinstance(self.family, families.Binomial):
1429 self._n_trials = self.model.n_trials
1430 else:
1431 self._n_trials = 1
1432 self.df_resid = model.df_resid
1433 self.df_model = model.df_model
1434 self._cache = {}
1435 # are these intermediate results needed or can we just
1436 # call the model's attributes?
1438 # for remove data and pickle without large arrays
1439 self._data_attr.extend(['results_constrained', '_freq_weights',
1440 '_var_weights', '_iweights'])
1441 self.data_in_cache = getattr(self, 'data_in_cache', [])
1442 self.data_in_cache.extend(['null', 'mu'])
1443 self._data_attr_model = getattr(self, '_data_attr_model', [])
1444 self._data_attr_model.append('mu')
1446 # robust covariance
1447 from statsmodels.base.covtype import get_robustcov_results
1448 if use_t is None:
1449 self.use_t = False # TODO: class default
1450 else:
1451 self.use_t = use_t
1453 # temporary warning
1454 ct = (cov_type == 'nonrobust') or (cov_type.upper().startswith('HC'))
1455 if self.model._has_freq_weights and not ct:
1456 import warnings
1457 from statsmodels.tools.sm_exceptions import SpecificationWarning
1458 warnings.warn('cov_type not fully supported with freq_weights',
1459 SpecificationWarning)
1461 if self.model._has_var_weights and not ct:
1462 import warnings
1463 from statsmodels.tools.sm_exceptions import SpecificationWarning
1464 warnings.warn('cov_type not fully supported with var_weights',
1465 SpecificationWarning)
1467 if cov_type == 'nonrobust':
1468 self.cov_type = 'nonrobust'
1469 self.cov_kwds = {'description': 'Standard Errors assume that the' +
1470 ' covariance matrix of the errors is correctly ' +
1471 'specified.'}
1473 else:
1474 if cov_kwds is None:
1475 cov_kwds = {}
1476 get_robustcov_results(self, cov_type=cov_type, use_self=True,
1477 use_t=use_t, **cov_kwds)
1479 @cached_data
1480 def resid_response(self):
1481 """
1482 Respnose residuals. The response residuals are defined as
1483 `endog` - `fittedvalues`
1484 """
1485 return self._n_trials * (self._endog-self.mu)
1487 @cached_data
1488 def resid_pearson(self):
1489 """
1490 Pearson residuals. The Pearson residuals are defined as
1491 (`endog` - `mu`)/sqrt(VAR(`mu`)) where VAR is the distribution
1492 specific variance function. See statsmodels.families.family and
1493 statsmodels.families.varfuncs for more information.
1494 """
1495 return (np.sqrt(self._n_trials) * (self._endog-self.mu) *
1496 np.sqrt(self._var_weights) /
1497 np.sqrt(self.family.variance(self.mu)))
1499 @cached_data
1500 def resid_working(self):
1501 """
1502 Working residuals. The working residuals are defined as
1503 `resid_response`/link'(`mu`). See statsmodels.family.links for the
1504 derivatives of the link functions. They are defined analytically.
1505 """
1506 # Isn't self.resid_response is already adjusted by _n_trials?
1507 val = (self.resid_response * self.family.link.deriv(self.mu))
1508 val *= self._n_trials
1509 return val
1511 @cached_data
1512 def resid_anscombe(self):
1513 """
1514 Anscombe residuals. See statsmodels.families.family for distribution-
1515 specific Anscombe residuals. Currently, the unscaled residuals are
1516 provided. In a future version, the scaled residuals will be provided.
1517 """
1518 import warnings
1519 warnings.warn('Anscombe residuals currently unscaled. In a future '
1520 'release, they will be scaled.', category=FutureWarning)
1521 return self.family.resid_anscombe(self._endog, self.fittedvalues,
1522 var_weights=self._var_weights,
1523 scale=1.)
1525 @cached_data
1526 def resid_anscombe_scaled(self):
1527 """
1528 Scaled Anscombe residuals. See statsmodels.families.family for
1529 distribution-specific Anscombe residuals.
1530 """
1531 return self.family.resid_anscombe(self._endog, self.fittedvalues,
1532 var_weights=self._var_weights,
1533 scale=self.scale)
1535 @cached_data
1536 def resid_anscombe_unscaled(self):
1537 """
1538 Unscaled Anscombe residuals. See statsmodels.families.family for
1539 distribution-specific Anscombe residuals.
1540 """
1541 return self.family.resid_anscombe(self._endog, self.fittedvalues,
1542 var_weights=self._var_weights,
1543 scale=1.)
1545 @cached_data
1546 def resid_deviance(self):
1547 """
1548 Deviance residuals. See statsmodels.families.family for distribution-
1549 specific deviance residuals.
1550 """
1551 dev = self.family.resid_dev(self._endog, self.fittedvalues,
1552 var_weights=self._var_weights,
1553 scale=1.)
1554 return dev
1556 @cached_value
1557 def pearson_chi2(self):
1558 """
1559 Pearson's Chi-Squared statistic is defined as the sum of the squares
1560 of the Pearson residuals.
1561 """
1562 chisq = (self._endog - self.mu)**2 / self.family.variance(self.mu)
1563 chisq *= self._iweights * self._n_trials
1564 chisqsum = np.sum(chisq)
1565 return chisqsum
1567 @cached_data
1568 def fittedvalues(self):
1569 """
1570 Linear predicted values for the fitted model.
1571 dot(exog, params)
1572 """
1573 return self.mu
1575 @cached_data
1576 def mu(self):
1577 """
1578 See GLM docstring.
1579 """
1580 return self.model.predict(self.params)
1582 @cache_readonly
1583 def null(self):
1584 """
1585 Fitted values of the null model
1586 """
1587 endog = self._endog
1588 model = self.model
1589 exog = np.ones((len(endog), 1))
1591 kwargs = model._get_init_kwds()
1592 kwargs.pop('family')
1593 if hasattr(self, '_offset_exposure'):
1594 return GLM(endog, exog, family=self.family,
1595 **kwargs).fit().fittedvalues
1596 else:
1597 # correct if fitted is identical across observations
1598 wls_model = lm.WLS(endog, exog,
1599 weights=self._iweights * self._n_trials)
1600 return wls_model.fit().fittedvalues
1602 @cache_readonly
1603 def deviance(self):
1604 """
1605 See statsmodels.families.family for the distribution-specific deviance
1606 functions.
1607 """
1608 return self.family.deviance(self._endog, self.mu, self._var_weights,
1609 self._freq_weights)
1611 @cache_readonly
1612 def null_deviance(self):
1613 """The value of the deviance function for the model fit with a constant
1614 as the only regressor."""
1615 return self.family.deviance(self._endog, self.null, self._var_weights,
1616 self._freq_weights)
1618 @cache_readonly
1619 def llnull(self):
1620 """
1621 Log-likelihood of the model fit with a constant as the only regressor
1622 """
1623 return self.family.loglike(self._endog, self.null,
1624 var_weights=self._var_weights,
1625 freq_weights=self._freq_weights,
1626 scale=self.scale)
1628 @cached_value
1629 def llf(self):
1630 """
1631 Value of the loglikelihood function evalued at params.
1632 See statsmodels.families.family for distribution-specific
1633 loglikelihoods.
1634 """
1635 _modelfamily = self.family
1636 if (isinstance(self.family, families.Gaussian) and
1637 isinstance(self.family.link, families.links.Power) and
1638 (self.family.link.power == 1.)):
1639 scale = (np.power(self._endog - self.mu, 2) * self._iweights).sum()
1640 scale /= self.model.wnobs
1641 else:
1642 scale = self.scale
1643 val = _modelfamily.loglike(self._endog, self.mu,
1644 var_weights=self._var_weights,
1645 freq_weights=self._freq_weights,
1646 scale=scale)
1647 return val
1649 @cached_value
1650 def aic(self):
1651 """
1652 Akaike Information Criterion
1653 -2 * `llf` + 2*(`df_model` + 1)
1654 """
1655 return -2 * self.llf + 2 * (self.df_model + 1)
1657 @cached_value
1658 def bic(self):
1659 """
1660 Bayes Information Criterion
1661 `deviance` - `df_resid` * log(`nobs`)
1662 """
1663 return (self.deviance -
1664 (self.model.wnobs - self.df_model - 1) *
1665 np.log(self.model.wnobs))
1667 @Appender(pred.get_prediction_glm.__doc__)
1668 def get_prediction(self, exog=None, exposure=None, offset=None,
1669 transform=True, linear=False,
1670 row_labels=None):
1672 import statsmodels.regression._prediction as linpred
1674 pred_kwds = {'exposure': exposure, 'offset': offset, 'linear': True}
1676 # two calls to a get_prediction duplicates exog generation if patsy
1677 res_linpred = linpred.get_prediction(self, exog=exog,
1678 transform=transform,
1679 row_labels=row_labels,
1680 pred_kwds=pred_kwds)
1682 pred_kwds['linear'] = False
1683 res = pred.get_prediction_glm(self, exog=exog, transform=transform,
1684 row_labels=row_labels,
1685 linpred=res_linpred,
1686 link=self.model.family.link,
1687 pred_kwds=pred_kwds)
1689 return res
1691 def get_hat_matrix_diag(self, observed=True):
1692 """
1693 Compute the diagonal of the hat matrix
1695 Parameters
1696 ----------
1697 observed : bool
1698 If true, then observed hessian is used in the hat matrix
1699 computation. If false, then the expected hessian is used.
1700 In the case of a canonical link function both are the same.
1702 Returns
1703 -------
1704 hat_matrix_diag : ndarray
1705 The diagonal of the hat matrix computed from the observed
1706 or expected hessian.
1707 """
1708 weights = self.model.hessian_factor(self.params, observed=observed)
1709 wexog = np.sqrt(weights)[:, None] * self.model.exog
1711 hd = (wexog * np.linalg.pinv(wexog).T).sum(1)
1712 return hd
1714 def get_influence(self, observed=True):
1715 """
1716 Get an instance of GLMInfluence with influence and outlier measures
1718 Parameters
1719 ----------
1720 observed : bool
1721 If true, then observed hessian is used in the hat matrix
1722 computation. If false, then the expected hessian is used.
1723 In the case of a canonical link function both are the same.
1725 Returns
1726 -------
1727 infl : GLMInfluence instance
1728 The instance has methods to calculate the main influence and
1729 outlier measures as attributes.
1731 See Also
1732 --------
1733 statsmodels.stats.outliers_influence.GLMInfluence
1734 """
1735 from statsmodels.stats.outliers_influence import GLMInfluence
1737 weights = self.model.hessian_factor(self.params, observed=observed)
1738 weights_sqrt = np.sqrt(weights)
1739 wexog = weights_sqrt[:, None] * self.model.exog
1740 wendog = weights_sqrt * self.model.endog
1742 # using get_hat_matrix_diag has duplicated computation
1743 hat_matrix_diag = self.get_hat_matrix_diag(observed=observed)
1744 infl = GLMInfluence(self, endog=wendog, exog=wexog,
1745 resid=self.resid_pearson,
1746 hat_matrix_diag=hat_matrix_diag)
1747 return infl
1749 @Appender(base.LikelihoodModelResults.remove_data.__doc__)
1750 def remove_data(self):
1751 # GLM has alias/reference in result instance
1752 self._data_attr.extend([i for i in self.model._data_attr
1753 if '_data.' not in i])
1754 super(self.__class__, self).remove_data()
1756 # TODO: what are these in results?
1757 self._endog = None
1758 self._freq_weights = None
1759 self._var_weights = None
1760 self._iweights = None
1761 self._n_trials = None
1763 @Appender(_plot_added_variable_doc % {'extra_params_doc': ''})
1764 def plot_added_variable(self, focus_exog, resid_type=None,
1765 use_glm_weights=True, fit_kwargs=None,
1766 ax=None):
1768 from statsmodels.graphics.regressionplots import plot_added_variable
1770 fig = plot_added_variable(self, focus_exog,
1771 resid_type=resid_type,
1772 use_glm_weights=use_glm_weights,
1773 fit_kwargs=fit_kwargs, ax=ax)
1775 return fig
1777 @Appender(_plot_partial_residuals_doc % {'extra_params_doc': ''})
1778 def plot_partial_residuals(self, focus_exog, ax=None):
1780 from statsmodels.graphics.regressionplots import plot_partial_residuals
1782 return plot_partial_residuals(self, focus_exog, ax=ax)
1784 @Appender(_plot_ceres_residuals_doc % {'extra_params_doc': ''})
1785 def plot_ceres_residuals(self, focus_exog, frac=0.66, cond_means=None,
1786 ax=None):
1788 from statsmodels.graphics.regressionplots import plot_ceres_residuals
1790 return plot_ceres_residuals(self, focus_exog, frac,
1791 cond_means=cond_means, ax=ax)
1793 def summary(self, yname=None, xname=None, title=None, alpha=.05):
1794 """
1795 Summarize the Regression Results
1797 Parameters
1798 ----------
1799 yname : str, optional
1800 Default is `y`
1801 xname : list[str], optional
1802 Names for the exogenous variables, default is `var_#` for ## in
1803 the number of regressors. Must match the number of parameters in
1804 the model
1805 title : str, optional
1806 Title for the top table. If not None, then this replaces the
1807 default title
1808 alpha : float
1809 significance level for the confidence intervals
1811 Returns
1812 -------
1813 smry : Summary instance
1814 this holds the summary tables and text, which can be printed or
1815 converted to various output formats.
1817 See Also
1818 --------
1819 statsmodels.iolib.summary.Summary : class to hold summary results
1820 """
1822 top_left = [('Dep. Variable:', None),
1823 ('Model:', None),
1824 ('Model Family:', [self.family.__class__.__name__]),
1825 ('Link Function:', [self.family.link.__class__.__name__]),
1826 ('Method:', [self.method]),
1827 ('Date:', None),
1828 ('Time:', None),
1829 ('No. Iterations:',
1830 ["%d" % self.fit_history['iteration']]),
1831 ]
1833 top_right = [('No. Observations:', None),
1834 ('Df Residuals:', None),
1835 ('Df Model:', None),
1836 ('Scale:', ["%#8.5g" % self.scale]),
1837 ('Log-Likelihood:', None),
1838 ('Deviance:', ["%#8.5g" % self.deviance]),
1839 ('Pearson chi2:', ["%#6.3g" % self.pearson_chi2])
1840 ]
1842 if hasattr(self, 'cov_type'):
1843 top_left.append(('Covariance Type:', [self.cov_type]))
1845 if title is None:
1846 title = "Generalized Linear Model Regression Results"
1848 # create summary tables
1849 from statsmodels.iolib.summary import Summary
1850 smry = Summary()
1851 smry.add_table_2cols(self, gleft=top_left, gright=top_right,
1852 yname=yname, xname=xname, title=title)
1853 smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha,
1854 use_t=self.use_t)
1856 if hasattr(self, 'constraints'):
1857 smry.add_extra_txt(['Model has been estimated subject to linear '
1858 'equality constraints.'])
1859 return smry
1861 def summary2(self, yname=None, xname=None, title=None, alpha=.05,
1862 float_format="%.4f"):
1863 """Experimental summary for regression Results
1865 Parameters
1866 ----------
1867 yname : str
1868 Name of the dependent variable (optional)
1869 xname : list[str], optional
1870 Names for the exogenous variables, default is `var_#` for ## in
1871 the number of regressors. Must match the number of parameters in
1872 the model
1873 title : str, optional
1874 Title for the top table. If not None, then this replaces the
1875 default title
1876 alpha : float
1877 significance level for the confidence intervals
1878 float_format : str
1879 print format for floats in parameters summary
1881 Returns
1882 -------
1883 smry : Summary instance
1884 this holds the summary tables and text, which can be printed or
1885 converted to various output formats.
1887 See Also
1888 --------
1889 statsmodels.iolib.summary2.Summary : class to hold summary results
1890 """
1891 self.method = 'IRLS'
1892 from statsmodels.iolib import summary2
1893 smry = summary2.Summary()
1894 smry.add_base(results=self, alpha=alpha, float_format=float_format,
1895 xname=xname, yname=yname, title=title)
1896 if hasattr(self, 'constraints'):
1897 smry.add_text('Model has been estimated subject to linear '
1898 'equality constraints.')
1900 return smry
1903class GLMResultsWrapper(lm.RegressionResultsWrapper):
1904 _attrs = {
1905 'resid_anscombe': 'rows',
1906 'resid_deviance': 'rows',
1907 'resid_pearson': 'rows',
1908 'resid_response': 'rows',
1909 'resid_working': 'rows'
1910 }
1911 _wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs,
1912 _attrs)
1915wrap.populate_wrapper(GLMResultsWrapper, GLMResults)
1917if __name__ == "__main__":
1918 import statsmodels.api as sm
1919 data = sm.datasets.longley.load(as_pandas=False)
1920 # data.exog = add_constant(data.exog)
1921 GLMmod = GLM(data.endog, data.exog).fit()
1922 GLMT = GLMmod.summary(returns='tables')
1923 # GLMT[0].extend_right(GLMT[1])
1924 # print(GLMT[0])
1925 # print(GLMT[2])
1926 GLMTp = GLMmod.summary(title='Test GLM')
1927 """
1928From Stata
1929. webuse beetle
1930. glm r i.beetle ldose, family(binomial n) link(cloglog)
1932Iteration 0: log likelihood = -79.012269
1933Iteration 1: log likelihood = -76.94951
1934Iteration 2: log likelihood = -76.945645
1935Iteration 3: log likelihood = -76.945645
1937Generalized linear models No. of obs = 24
1938Optimization : ML Residual df = 20
1939 Scale parameter = 1
1940Deviance = 73.76505595 (1/df) Deviance = 3.688253
1941Pearson = 71.8901173 (1/df) Pearson = 3.594506
1943Variance function: V(u) = u*(1-u/n) [Binomial]
1944Link function : g(u) = ln(-ln(1-u/n)) [Complementary log-log]
1946 AIC = 6.74547
1947Log likelihood = -76.94564525 BIC = 10.20398
1949------------------------------------------------------------------------------
1950 | OIM
1951 r | Coef. Std. Err. z P>|z| [95% Conf. Interval]
1952-------------+----------------------------------------------------------------
1953 beetle |
1954 2 | -.0910396 .1076132 -0.85 0.398 -.3019576 .1198783
1955 3 | -1.836058 .1307125 -14.05 0.000 -2.09225 -1.579867
1956 |
1957 ldose | 19.41558 .9954265 19.50 0.000 17.46458 21.36658
1958 _cons | -34.84602 1.79333 -19.43 0.000 -38.36089 -31.33116
1959------------------------------------------------------------------------------
1960"""