Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/discrete/discrete_model.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Limited dependent variable and qualitative variables.
4Includes binary outcomes, count data, (ordered) ordinal data and limited
5dependent variables.
7General References
8--------------------
10A.C. Cameron and P.K. Trivedi. `Regression Analysis of Count Data`.
11 Cambridge, 1998
13G.S. Madalla. `Limited-Dependent and Qualitative Variables in Econometrics`.
14 Cambridge, 1983.
16W. Greene. `Econometric Analysis`. Prentice Hall, 5th. edition. 2003.
17"""
18__all__ = ["Poisson", "Logit", "Probit", "MNLogit", "NegativeBinomial",
19 "GeneralizedPoisson", "NegativeBinomialP", "CountModel"]
21import numpy as np
22from pandas import get_dummies, MultiIndex
24from scipy.special import gammaln, digamma, polygamma, loggamma
25from scipy import stats, special
26from scipy.stats import nbinom
28from statsmodels.compat.pandas import Appender
30import statsmodels.tools.tools as tools
31from statsmodels.tools import data as data_tools
32from statsmodels.tools.decorators import cache_readonly
33from statsmodels.tools.sm_exceptions import (PerfectSeparationError,
34 SpecificationWarning)
35from statsmodels.tools.numdiff import approx_fprime_cs
36import statsmodels.base.model as base
37from statsmodels.base.data import handle_data # for mnlogit
38import statsmodels.regression.linear_model as lm
39import statsmodels.base.wrapper as wrap
41from statsmodels.base.l1_slsqp import fit_l1_slsqp
42from statsmodels.distributions import genpoisson_p
44try:
45 import cvxopt # noqa:F401
46 have_cvxopt = True
47except ImportError:
48 have_cvxopt = False
50import warnings
52#TODO: When we eventually get user-settable precision, we need to change
53# this
54FLOAT_EPS = np.finfo(float).eps
56#TODO: add options for the parameter covariance/variance
57# ie., OIM, EIM, and BHHH see Green 21.4
59_discrete_models_docs = """
60"""
62_discrete_results_docs = """
63 %(one_line_description)s
65 Parameters
66 ----------
67 model : A DiscreteModel instance
68 params : array_like
69 The parameters of a fitted model.
70 hessian : array_like
71 The hessian of the fitted model.
72 scale : float
73 A scale parameter for the covariance matrix.
75 Attributes
76 ----------
77 df_resid : float
78 See model definition.
79 df_model : float
80 See model definition.
81 llf : float
82 Value of the loglikelihood
83 %(extra_attr)s"""
85_l1_results_attr = """ nnz_params : int
86 The number of nonzero parameters in the model. Train with
87 trim_params == True or else numerical error will distort this.
88 trimmed : bool array
89 trimmed[i] == True if the ith parameter was trimmed from the model."""
91_get_start_params_null_docs = """
92Compute one-step moment estimator for null (constant-only) model
94This is a preliminary estimator used as start_params.
96Returns
97-------
98params : ndarray
99 parameter estimate based one one-step moment matching
101"""
103# helper for MNLogit (will be generally useful later)
105def _numpy_to_dummies(endog):
106 if endog.dtype.kind in ['S', 'O']:
107 endog_dummies, ynames = tools.categorical(endog, drop=True,
108 dictnames=True)
109 elif endog.ndim == 2:
110 endog_dummies = endog
111 ynames = range(endog.shape[1])
112 else:
113 endog_dummies, ynames = tools.categorical(endog, drop=True,
114 dictnames=True)
115 return endog_dummies, ynames
118def _pandas_to_dummies(endog):
119 if endog.ndim == 2:
120 if endog.shape[1] == 1:
121 yname = endog.columns[0]
122 endog_dummies = get_dummies(endog.iloc[:, 0])
123 else: # series
124 yname = 'y'
125 endog_dummies = endog
126 else:
127 yname = endog.name
128 endog_dummies = get_dummies(endog)
129 ynames = endog_dummies.columns.tolist()
131 return endog_dummies, ynames, yname
134def _validate_l1_method(method):
135 """
136 As of 0.10.0, the supported values for `method` in `fit_regularized`
137 are "l1" and "l1_cvxopt_cp". If an invalid value is passed, raise
138 with a helpful error message
140 Parameters
141 ----------
142 method : str
144 Raises
145 ------
146 ValueError
147 """
148 if method not in ['l1', 'l1_cvxopt_cp']:
149 raise ValueError('`method` = {method} is not supported, use either '
150 '"l1" or "l1_cvxopt_cp"'.format(method=method))
153#### Private Model Classes ####
156class DiscreteModel(base.LikelihoodModel):
157 """
158 Abstract class for discrete choice models.
160 This class does not do anything itself but lays out the methods and
161 call signature expected of child classes in addition to those of
162 statsmodels.model.LikelihoodModel.
163 """
164 def __init__(self, endog, exog, **kwargs):
165 super(DiscreteModel, self).__init__(endog, exog, **kwargs)
166 self.raise_on_perfect_prediction = True
168 def initialize(self):
169 """
170 Initialize is called by
171 statsmodels.model.LikelihoodModel.__init__
172 and should contain any preprocessing that needs to be done for a model.
173 """
174 # assumes constant
175 rank = np.linalg.matrix_rank(self.exog)
176 self.df_model = float(rank - 1)
177 self.df_resid = float(self.exog.shape[0] - rank)
179 def cdf(self, X):
180 """
181 The cumulative distribution function of the model.
182 """
183 raise NotImplementedError
185 def pdf(self, X):
186 """
187 The probability density (mass) function of the model.
188 """
189 raise NotImplementedError
191 def _check_perfect_pred(self, params, *args):
192 endog = self.endog
193 fittedvalues = self.cdf(np.dot(self.exog, params[:self.exog.shape[1]]))
194 if (self.raise_on_perfect_prediction and
195 np.allclose(fittedvalues - endog, 0)):
196 msg = "Perfect separation detected, results not available"
197 raise PerfectSeparationError(msg)
199 @Appender(base.LikelihoodModel.fit.__doc__)
200 def fit(self, start_params=None, method='newton', maxiter=35,
201 full_output=1, disp=1, callback=None, **kwargs):
202 """
203 Fit the model using maximum likelihood.
205 The rest of the docstring is from
206 statsmodels.base.model.LikelihoodModel.fit
207 """
208 if callback is None:
209 callback = self._check_perfect_pred
210 else:
211 pass # TODO: make a function factory to have multiple call-backs
213 mlefit = super(DiscreteModel, self).fit(
214 start_params=start_params,
215 method=method, maxiter=maxiter, full_output=full_output,
216 disp=disp, callback=callback, **kwargs)
218 return mlefit # It is up to subclasses to wrap results
220 def fit_regularized(self, start_params=None, method='l1',
221 maxiter='defined_by_method', full_output=1, disp=True,
222 callback=None, alpha=0, trim_mode='auto',
223 auto_trim_tol=0.01, size_trim_tol=1e-4, qc_tol=0.03,
224 qc_verbose=False, **kwargs):
225 """
226 Fit the model using a regularized maximum likelihood.
228 The regularization method AND the solver used is determined by the
229 argument method.
231 Parameters
232 ----------
233 start_params : array_like, optional
234 Initial guess of the solution for the loglikelihood maximization.
235 The default is an array of zeros.
236 method : 'l1' or 'l1_cvxopt_cp'
237 See notes for details.
238 maxiter : {int, 'defined_by_method'}
239 Maximum number of iterations to perform.
240 If 'defined_by_method', then use method defaults (see notes).
241 full_output : bool
242 Set to True to have all available output in the Results object's
243 mle_retvals attribute. The output is dependent on the solver.
244 See LikelihoodModelResults notes section for more information.
245 disp : bool
246 Set to True to print convergence messages.
247 fargs : tuple
248 Extra arguments passed to the likelihood function, i.e.,
249 loglike(x,*args).
250 callback : callable callback(xk)
251 Called after each iteration, as callback(xk), where xk is the
252 current parameter vector.
253 retall : bool
254 Set to True to return list of solutions at each iteration.
255 Available in Results object's mle_retvals attribute.
256 alpha : non-negative scalar or numpy array (same size as parameters)
257 The weight multiplying the l1 penalty term.
258 trim_mode : 'auto, 'size', or 'off'
259 If not 'off', trim (set to zero) parameters that would have been
260 zero if the solver reached the theoretical minimum.
261 If 'auto', trim params using the Theory above.
262 If 'size', trim params if they have very small absolute value.
263 size_trim_tol : float or 'auto' (default = 'auto')
264 Tolerance used when trim_mode == 'size'.
265 auto_trim_tol : float
266 Tolerance used when trim_mode == 'auto'.
267 qc_tol : float
268 Print warning and do not allow auto trim when (ii) (above) is
269 violated by this much.
270 qc_verbose : bool
271 If true, print out a full QC report upon failure.
272 **kwargs
273 Additional keyword arguments used when fitting the model.
275 Returns
276 -------
277 Results
278 A results instance.
280 Notes
281 -----
282 Using 'l1_cvxopt_cp' requires the cvxopt module.
284 Extra parameters are not penalized if alpha is given as a scalar.
285 An example is the shape parameter in NegativeBinomial `nb1` and `nb2`.
287 Optional arguments for the solvers (available in Results.mle_settings)::
289 'l1'
290 acc : float (default 1e-6)
291 Requested accuracy as used by slsqp
292 'l1_cvxopt_cp'
293 abstol : float
294 absolute accuracy (default: 1e-7).
295 reltol : float
296 relative accuracy (default: 1e-6).
297 feastol : float
298 tolerance for feasibility conditions (default: 1e-7).
299 refinement : int
300 number of iterative refinement steps when solving KKT
301 equations (default: 1).
303 Optimization methodology
305 With :math:`L` the negative log likelihood, we solve the convex but
306 non-smooth problem
308 .. math:: \\min_\\beta L(\\beta) + \\sum_k\\alpha_k |\\beta_k|
310 via the transformation to the smooth, convex, constrained problem
311 in twice as many variables (adding the "added variables" :math:`u_k`)
313 .. math:: \\min_{\\beta,u} L(\\beta) + \\sum_k\\alpha_k u_k,
315 subject to
317 .. math:: -u_k \\leq \\beta_k \\leq u_k.
319 With :math:`\\partial_k L` the derivative of :math:`L` in the
320 :math:`k^{th}` parameter direction, theory dictates that, at the
321 minimum, exactly one of two conditions holds:
323 (i) :math:`|\\partial_k L| = \\alpha_k` and :math:`\\beta_k \\neq 0`
324 (ii) :math:`|\\partial_k L| \\leq \\alpha_k` and :math:`\\beta_k = 0`
325 """
326 _validate_l1_method(method)
327 # Set attributes based on method
328 cov_params_func = self.cov_params_func_l1
330 ### Bundle up extra kwargs for the dictionary kwargs. These are
331 ### passed through super(...).fit() as kwargs and unpacked at
332 ### appropriate times
333 alpha = np.array(alpha)
334 assert alpha.min() >= 0
335 try:
336 kwargs['alpha'] = alpha
337 except TypeError:
338 kwargs = dict(alpha=alpha)
339 kwargs['alpha_rescaled'] = kwargs['alpha'] / float(self.endog.shape[0])
340 kwargs['trim_mode'] = trim_mode
341 kwargs['size_trim_tol'] = size_trim_tol
342 kwargs['auto_trim_tol'] = auto_trim_tol
343 kwargs['qc_tol'] = qc_tol
344 kwargs['qc_verbose'] = qc_verbose
346 ### Define default keyword arguments to be passed to super(...).fit()
347 if maxiter == 'defined_by_method':
348 if method == 'l1':
349 maxiter = 1000
350 elif method == 'l1_cvxopt_cp':
351 maxiter = 70
353 ## Parameters to pass to super(...).fit()
354 # For the 'extra' parameters, pass all that are available,
355 # even if we know (at this point) we will only use one.
356 extra_fit_funcs = {'l1': fit_l1_slsqp}
357 if have_cvxopt and method == 'l1_cvxopt_cp':
358 from statsmodels.base.l1_cvxopt import fit_l1_cvxopt_cp
359 extra_fit_funcs['l1_cvxopt_cp'] = fit_l1_cvxopt_cp
360 elif method.lower() == 'l1_cvxopt_cp':
361 raise ValueError("Cannot use l1_cvxopt_cp as cvxopt "
362 "was not found (install it, or use method='l1' instead)")
364 if callback is None:
365 callback = self._check_perfect_pred
366 else:
367 pass # make a function factory to have multiple call-backs
369 mlefit = super(DiscreteModel, self).fit(start_params=start_params,
370 method=method, maxiter=maxiter, full_output=full_output,
371 disp=disp, callback=callback, extra_fit_funcs=extra_fit_funcs,
372 cov_params_func=cov_params_func, **kwargs)
374 return mlefit # up to subclasses to wrap results
376 def cov_params_func_l1(self, likelihood_model, xopt, retvals):
377 """
378 Computes cov_params on a reduced parameter space
379 corresponding to the nonzero parameters resulting from the
380 l1 regularized fit.
382 Returns a full cov_params matrix, with entries corresponding
383 to zero'd values set to np.nan.
384 """
385 H = likelihood_model.hessian(xopt)
386 trimmed = retvals['trimmed']
387 nz_idx = np.nonzero(~trimmed)[0]
388 nnz_params = (~trimmed).sum()
389 if nnz_params > 0:
390 H_restricted = H[nz_idx[:, None], nz_idx]
391 # Covariance estimate for the nonzero params
392 H_restricted_inv = np.linalg.inv(-H_restricted)
393 else:
394 H_restricted_inv = np.zeros(0)
396 cov_params = np.nan * np.ones(H.shape)
397 cov_params[nz_idx[:, None], nz_idx] = H_restricted_inv
399 return cov_params
401 def predict(self, params, exog=None, linear=False):
402 """
403 Predict response variable of a model given exogenous variables.
404 """
405 raise NotImplementedError
407 def _derivative_exog(self, params, exog=None, dummy_idx=None,
408 count_idx=None):
409 """
410 This should implement the derivative of the non-linear function
411 """
412 raise NotImplementedError
414 def _derivative_exog_helper(self, margeff, params, exog, dummy_idx,
415 count_idx, transform):
416 """
417 Helper for _derivative_exog to wrap results appropriately
418 """
419 from .discrete_margins import _get_count_effects, _get_dummy_effects
421 if count_idx is not None:
422 margeff = _get_count_effects(margeff, exog, count_idx, transform,
423 self, params)
424 if dummy_idx is not None:
425 margeff = _get_dummy_effects(margeff, exog, dummy_idx, transform,
426 self, params)
428 return margeff
431class BinaryModel(DiscreteModel):
433 def __init__(self, endog, exog, **kwargs):
434 super(BinaryModel, self).__init__(endog, exog, **kwargs)
435 if (not issubclass(self.__class__, MultinomialModel) and
436 not np.all((self.endog >= 0) & (self.endog <= 1))):
437 raise ValueError("endog must be in the unit interval.")
440 def predict(self, params, exog=None, linear=False):
441 """
442 Predict response variable of a model given exogenous variables.
444 Parameters
445 ----------
446 params : array_like
447 Fitted parameters of the model.
448 exog : array_like
449 1d or 2d array of exogenous values. If not supplied, the
450 whole exog attribute of the model is used.
451 linear : bool, optional
452 If True, returns the linear predictor dot(exog,params). Else,
453 returns the value of the cdf at the linear predictor.
455 Returns
456 -------
457 array
458 Fitted values at exog.
459 """
460 if exog is None:
461 exog = self.exog
462 if not linear:
463 return self.cdf(np.dot(exog, params))
464 else:
465 return np.dot(exog, params)
467 @Appender(DiscreteModel.fit_regularized.__doc__)
468 def fit_regularized(self, start_params=None, method='l1',
469 maxiter='defined_by_method', full_output=1, disp=1, callback=None,
470 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
471 qc_tol=0.03, **kwargs):
473 _validate_l1_method(method)
475 bnryfit = super(BinaryModel, self).fit_regularized(
476 start_params=start_params, method=method, maxiter=maxiter,
477 full_output=full_output, disp=disp, callback=callback,
478 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
479 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
481 discretefit = L1BinaryResults(self, bnryfit)
482 return L1BinaryResultsWrapper(discretefit)
484 def _derivative_predict(self, params, exog=None, transform='dydx'):
485 """
486 For computing marginal effects standard errors.
488 This is used only in the case of discrete and count regressors to
489 get the variance-covariance of the marginal effects. It returns
490 [d F / d params] where F is the predict.
492 Transform can be 'dydx' or 'eydx'. Checking is done in margeff
493 computations for appropriate transform.
494 """
495 if exog is None:
496 exog = self.exog
497 dF = self.pdf(np.dot(exog, params))[:,None] * exog
498 if 'ey' in transform:
499 dF /= self.predict(params, exog)[:,None]
500 return dF
502 def _derivative_exog(self, params, exog=None, transform='dydx',
503 dummy_idx=None, count_idx=None):
504 """
505 For computing marginal effects returns dF(XB) / dX where F(.) is
506 the predicted probabilities
508 transform can be 'dydx', 'dyex', 'eydx', or 'eyex'.
510 Not all of these make sense in the presence of discrete regressors,
511 but checks are done in the results in get_margeff.
512 """
513 # Note: this form should be appropriate for
514 # group 1 probit, logit, logistic, cloglog, heckprob, xtprobit
515 if exog is None:
516 exog = self.exog
518 margeff = np.dot(self.pdf(np.dot(exog, params))[:, None],
519 params[None, :])
521 if 'ex' in transform:
522 margeff *= exog
523 if 'ey' in transform:
524 margeff /= self.predict(params, exog)[:, None]
526 return self._derivative_exog_helper(margeff, params, exog,
527 dummy_idx, count_idx, transform)
530class MultinomialModel(BinaryModel):
532 def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
533 if data_tools._is_using_ndarray_type(endog, None):
534 endog_dummies, ynames = _numpy_to_dummies(endog)
535 yname = 'y'
536 elif data_tools._is_using_pandas(endog, None):
537 endog_dummies, ynames, yname = _pandas_to_dummies(endog)
538 else:
539 endog = np.asarray(endog)
540 endog_dummies, ynames = _numpy_to_dummies(endog)
541 yname = 'y'
543 if not isinstance(ynames, dict):
544 ynames = dict(zip(range(endog_dummies.shape[1]), ynames))
546 self._ynames_map = ynames
547 data = handle_data(endog_dummies, exog, missing, hasconst, **kwargs)
548 data.ynames = yname # overwrite this to single endog name
549 data.orig_endog = endog
550 self.wendog = data.endog
552 # repeating from upstream...
553 for key in kwargs:
554 if key in ['design_info', 'formula']: # leave attached to data
555 continue
556 try:
557 setattr(self, key, data.__dict__.pop(key))
558 except KeyError:
559 pass
560 return data
562 def initialize(self):
563 """
564 Preprocesses the data for MNLogit.
565 """
566 super(MultinomialModel, self).initialize()
567 # This is also a "whiten" method in other models (eg regression)
568 self.endog = self.endog.argmax(1) # turn it into an array of col idx
569 self.J = self.wendog.shape[1]
570 self.K = self.exog.shape[1]
571 self.df_model *= (self.J-1) # for each J - 1 equation.
572 self.df_resid = self.exog.shape[0] - self.df_model - (self.J-1)
574 def predict(self, params, exog=None, linear=False):
575 """
576 Predict response variable of a model given exogenous variables.
578 Parameters
579 ----------
580 params : array_like
581 2d array of fitted parameters of the model. Should be in the
582 order returned from the model.
583 exog : array_like
584 1d or 2d array of exogenous values. If not supplied, the
585 whole exog attribute of the model is used. If a 1d array is given
586 it assumed to be 1 row of exogenous variables. If you only have
587 one regressor and would like to do prediction, you must provide
588 a 2d array with shape[1] == 1.
589 linear : bool, optional
590 If True, returns the linear predictor dot(exog,params). Else,
591 returns the value of the cdf at the linear predictor.
593 Notes
594 -----
595 Column 0 is the base case, the rest conform to the rows of params
596 shifted up one for the base case.
597 """
598 if exog is None: # do here to accommodate user-given exog
599 exog = self.exog
600 if exog.ndim == 1:
601 exog = exog[None]
602 pred = super(MultinomialModel, self).predict(params, exog, linear)
603 if linear:
604 pred = np.column_stack((np.zeros(len(exog)), pred))
605 return pred
607 @Appender(DiscreteModel.fit.__doc__)
608 def fit(self, start_params=None, method='newton', maxiter=35,
609 full_output=1, disp=1, callback=None, **kwargs):
610 if start_params is None:
611 start_params = np.zeros((self.K * (self.J-1)))
612 else:
613 start_params = np.asarray(start_params)
614 callback = lambda x : None # placeholder until check_perfect_pred
615 # skip calling super to handle results from LikelihoodModel
616 mnfit = base.LikelihoodModel.fit(self, start_params = start_params,
617 method=method, maxiter=maxiter, full_output=full_output,
618 disp=disp, callback=callback, **kwargs)
619 mnfit.params = mnfit.params.reshape(self.K, -1, order='F')
620 mnfit = MultinomialResults(self, mnfit)
621 return MultinomialResultsWrapper(mnfit)
623 @Appender(DiscreteModel.fit_regularized.__doc__)
624 def fit_regularized(self, start_params=None, method='l1',
625 maxiter='defined_by_method', full_output=1, disp=1, callback=None,
626 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
627 qc_tol=0.03, **kwargs):
628 if start_params is None:
629 start_params = np.zeros((self.K * (self.J-1)))
630 else:
631 start_params = np.asarray(start_params)
632 mnfit = DiscreteModel.fit_regularized(
633 self, start_params=start_params, method=method, maxiter=maxiter,
634 full_output=full_output, disp=disp, callback=callback,
635 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
636 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
637 mnfit.params = mnfit.params.reshape(self.K, -1, order='F')
638 mnfit = L1MultinomialResults(self, mnfit)
639 return L1MultinomialResultsWrapper(mnfit)
641 def _derivative_predict(self, params, exog=None, transform='dydx'):
642 """
643 For computing marginal effects standard errors.
645 This is used only in the case of discrete and count regressors to
646 get the variance-covariance of the marginal effects. It returns
647 [d F / d params] where F is the predicted probabilities for each
648 choice. dFdparams is of shape nobs x (J*K) x (J-1)*K.
649 The zero derivatives for the base category are not included.
651 Transform can be 'dydx' or 'eydx'. Checking is done in margeff
652 computations for appropriate transform.
653 """
654 if exog is None:
655 exog = self.exog
656 if params.ndim == 1: # will get flatted from approx_fprime
657 params = params.reshape(self.K, self.J-1, order='F')
659 eXB = np.exp(np.dot(exog, params))
660 sum_eXB = (1 + eXB.sum(1))[:,None]
661 J = int(self.J)
662 K = int(self.K)
663 repeat_eXB = np.repeat(eXB, J, axis=1)
664 X = np.tile(exog, J-1)
665 # this is the derivative wrt the base level
666 F0 = -repeat_eXB * X / sum_eXB ** 2
667 # this is the derivative wrt the other levels when
668 # dF_j / dParams_j (ie., own equation)
669 #NOTE: this computes too much, any easy way to cut down?
670 F1 = eXB.T[:,:,None]*X * (sum_eXB - repeat_eXB) / (sum_eXB**2)
671 F1 = F1.transpose((1,0,2)) # put the nobs index first
673 # other equation index
674 other_idx = ~np.kron(np.eye(J-1), np.ones(K)).astype(bool)
675 F1[:, other_idx] = (-eXB.T[:,:,None]*X*repeat_eXB / \
676 (sum_eXB**2)).transpose((1,0,2))[:, other_idx]
677 dFdX = np.concatenate((F0[:, None,:], F1), axis=1)
679 if 'ey' in transform:
680 dFdX /= self.predict(params, exog)[:, :, None]
681 return dFdX
683 def _derivative_exog(self, params, exog=None, transform='dydx',
684 dummy_idx=None, count_idx=None):
685 """
686 For computing marginal effects returns dF(XB) / dX where F(.) is
687 the predicted probabilities
689 transform can be 'dydx', 'dyex', 'eydx', or 'eyex'.
691 Not all of these make sense in the presence of discrete regressors,
692 but checks are done in the results in get_margeff.
694 For Multinomial models the marginal effects are
696 P[j] * (params[j] - sum_k P[k]*params[k])
698 It is returned unshaped, so that each row contains each of the J
699 equations. This makes it easier to take derivatives of this for
700 standard errors. If you want average marginal effects you can do
701 margeff.reshape(nobs, K, J, order='F).mean(0) and the marginal effects
702 for choice J are in column J
703 """
704 J = int(self.J) # number of alternative choices
705 K = int(self.K) # number of variables
706 # Note: this form should be appropriate for
707 # group 1 probit, logit, logistic, cloglog, heckprob, xtprobit
708 if exog is None:
709 exog = self.exog
710 if params.ndim == 1: # will get flatted from approx_fprime
711 params = params.reshape(K, J-1, order='F')
713 zeroparams = np.c_[np.zeros(K), params] # add base in
715 cdf = self.cdf(np.dot(exog, params))
717 # TODO: meaningful interpretation for `iterm`?
718 iterm = np.array([cdf[:, [i]] * zeroparams[:, i]
719 for i in range(int(J))]).sum(0)
721 margeff = np.array([cdf[:, [j]] * (zeroparams[:, j] - iterm)
722 for j in range(J)])
724 # swap the axes to make sure margeff are in order nobs, K, J
725 margeff = np.transpose(margeff, (1, 2, 0))
727 if 'ex' in transform:
728 margeff *= exog
729 if 'ey' in transform:
730 margeff /= self.predict(params, exog)[:,None,:]
732 margeff = self._derivative_exog_helper(margeff, params, exog,
733 dummy_idx, count_idx, transform)
734 return margeff.reshape(len(exog), -1, order='F')
737class CountModel(DiscreteModel):
738 def __init__(self, endog, exog, offset=None, exposure=None, missing='none',
739 **kwargs):
740 super(CountModel, self).__init__(endog, exog, missing=missing,
741 offset=offset,
742 exposure=exposure, **kwargs)
743 if exposure is not None:
744 self.exposure = np.log(self.exposure)
745 self._check_inputs(self.offset, self.exposure, self.endog)
746 if offset is None:
747 delattr(self, 'offset')
748 if exposure is None:
749 delattr(self, 'exposure')
751 # promote dtype to float64 if needed
752 dt = np.promote_types(self.endog.dtype, np.float64)
753 self.endog = np.asarray(self.endog, dt)
754 dt = np.promote_types(self.exog.dtype, np.float64)
755 self.exog = np.asarray(self.exog, dt)
758 def _check_inputs(self, offset, exposure, endog):
759 if offset is not None and offset.shape[0] != endog.shape[0]:
760 raise ValueError("offset is not the same length as endog")
762 if exposure is not None and exposure.shape[0] != endog.shape[0]:
763 raise ValueError("exposure is not the same length as endog")
765 def _get_init_kwds(self):
766 # this is a temporary fixup because exposure has been transformed
767 # see #1609
768 kwds = super(CountModel, self)._get_init_kwds()
769 if 'exposure' in kwds and kwds['exposure'] is not None:
770 kwds['exposure'] = np.exp(kwds['exposure'])
771 return kwds
773 def predict(self, params, exog=None, exposure=None, offset=None,
774 linear=False):
775 """
776 Predict response variable of a count model given exogenous variables
778 Parameters
779 ----------
780 params : array_like
781 Model parameters
782 exog : array_like, optional
783 Design / exogenous data. Is exog is None, model exog is used.
784 exposure : array_like, optional
785 Log(exposure) is added to the linear prediction with
786 coefficient equal to 1. If exposure is not provided and exog
787 is None, uses the model's exposure if present. If not, uses
788 0 as the default value.
789 offset : array_like, optional
790 Offset is added to the linear prediction with coefficient
791 equal to 1. If offset is not provided and exog
792 is None, uses the model's offset if present. If not, uses
793 0 as the default value.
794 linear : bool
795 If True, returns the linear predicted values. If False,
796 returns the exponential of the linear predicted value.
798 Notes
799 -----
800 If exposure is specified, then it will be logged by the method.
801 The user does not need to log it first.
802 """
803 # the following is copied from GLM predict (without family/link check)
804 # Use fit offset if appropriate
805 if offset is None and exog is None and hasattr(self, 'offset'):
806 offset = self.offset
807 elif offset is None:
808 offset = 0.
810 # Use fit exposure if appropriate
811 if exposure is None and exog is None and hasattr(self, 'exposure'):
812 # Already logged
813 exposure = self.exposure
814 elif exposure is None:
815 exposure = 0.
816 else:
817 exposure = np.log(exposure)
819 if exog is None:
820 exog = self.exog
822 fitted = np.dot(exog, params[:exog.shape[1]])
823 linpred = fitted + exposure + offset
824 if not linear:
825 return np.exp(linpred) # not cdf
826 else:
827 return linpred
829 def _derivative_predict(self, params, exog=None, transform='dydx'):
830 """
831 For computing marginal effects standard errors.
833 This is used only in the case of discrete and count regressors to
834 get the variance-covariance of the marginal effects. It returns
835 [d F / d params] where F is the predict.
837 Transform can be 'dydx' or 'eydx'. Checking is done in margeff
838 computations for appropriate transform.
839 """
840 if exog is None:
841 exog = self.exog
842 #NOTE: this handles offset and exposure
843 dF = self.predict(params, exog)[:,None] * exog
844 if 'ey' in transform:
845 dF /= self.predict(params, exog)[:,None]
846 return dF
848 def _derivative_exog(self, params, exog=None, transform="dydx",
849 dummy_idx=None, count_idx=None):
850 """
851 For computing marginal effects. These are the marginal effects
852 d F(XB) / dX
853 For the Poisson model F(XB) is the predicted counts rather than
854 the probabilities.
856 transform can be 'dydx', 'dyex', 'eydx', or 'eyex'.
858 Not all of these make sense in the presence of discrete regressors,
859 but checks are done in the results in get_margeff.
860 """
861 # group 3 poisson, nbreg, zip, zinb
862 if exog is None:
863 exog = self.exog
864 k_extra = getattr(self, 'k_extra', 0)
865 params_exog = params if k_extra == 0 else params[:-k_extra]
866 margeff = self.predict(params, exog)[:,None] * params_exog[None,:]
867 if 'ex' in transform:
868 margeff *= exog
869 if 'ey' in transform:
870 margeff /= self.predict(params, exog)[:,None]
872 return self._derivative_exog_helper(margeff, params, exog,
873 dummy_idx, count_idx, transform)
875 @Appender(DiscreteModel.fit.__doc__)
876 def fit(self, start_params=None, method='newton', maxiter=35,
877 full_output=1, disp=1, callback=None, **kwargs):
878 cntfit = super(CountModel, self).fit(start_params=start_params,
879 method=method, maxiter=maxiter, full_output=full_output,
880 disp=disp, callback=callback, **kwargs)
881 discretefit = CountResults(self, cntfit)
882 return CountResultsWrapper(discretefit)
884 @Appender(DiscreteModel.fit_regularized.__doc__)
885 def fit_regularized(self, start_params=None, method='l1',
886 maxiter='defined_by_method', full_output=1, disp=1, callback=None,
887 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
888 qc_tol=0.03, **kwargs):
890 _validate_l1_method(method)
892 cntfit = super(CountModel, self).fit_regularized(
893 start_params=start_params, method=method, maxiter=maxiter,
894 full_output=full_output, disp=disp, callback=callback,
895 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
896 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
898 discretefit = L1CountResults(self, cntfit)
899 return L1CountResultsWrapper(discretefit)
902class OrderedModel(DiscreteModel):
903 pass
905# Public Model Classes
908class Poisson(CountModel):
909 __doc__ = """
910 Poisson Model
912 %(params)s
913 %(extra_params)s
915 Attributes
916 ----------
917 endog : ndarray
918 A reference to the endogenous response variable
919 exog : ndarray
920 A reference to the exogenous design.
921 """ % {'params' : base._model_params_doc,
922 'extra_params' :
923 """offset : array_like
924 Offset is added to the linear prediction with coefficient equal to 1.
925 exposure : array_like
926 Log(exposure) is added to the linear prediction with coefficient
927 equal to 1.
928 """ + base._missing_param_doc}
930 @property
931 def family(self):
932 from statsmodels.genmod import families
933 return families.Poisson()
935 def cdf(self, X):
936 """
937 Poisson model cumulative distribution function
939 Parameters
940 ----------
941 X : array_like
942 `X` is the linear predictor of the model. See notes.
944 Returns
945 -------
946 The value of the Poisson CDF at each point.
948 Notes
949 -----
950 The CDF is defined as
952 .. math:: \\exp\\left(-\\lambda\\right)\\sum_{i=0}^{y}\\frac{\\lambda^{i}}{i!}
954 where :math:`\\lambda` assumes the loglinear model. I.e.,
956 .. math:: \\ln\\lambda_{i}=X\\beta
958 The parameter `X` is :math:`X\\beta` in the above formula.
959 """
960 y = self.endog
961 return stats.poisson.cdf(y, np.exp(X))
963 def pdf(self, X):
964 """
965 Poisson model probability mass function
967 Parameters
968 ----------
969 X : array_like
970 `X` is the linear predictor of the model. See notes.
972 Returns
973 -------
974 pdf : ndarray
975 The value of the Poisson probability mass function, PMF, for each
976 point of X.
978 Notes
979 --------
980 The PMF is defined as
982 .. math:: \\frac{e^{-\\lambda_{i}}\\lambda_{i}^{y_{i}}}{y_{i}!}
984 where :math:`\\lambda` assumes the loglinear model. I.e.,
986 .. math:: \\ln\\lambda_{i}=x_{i}\\beta
988 The parameter `X` is :math:`x_{i}\\beta` in the above formula.
989 """
990 y = self.endog
991 return np.exp(stats.poisson.logpmf(y, np.exp(X)))
993 def loglike(self, params):
994 """
995 Loglikelihood of Poisson model
997 Parameters
998 ----------
999 params : array_like
1000 The parameters of the model.
1002 Returns
1003 -------
1004 loglike : float
1005 The log-likelihood function of the model evaluated at `params`.
1006 See notes.
1008 Notes
1009 --------
1010 .. math:: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
1011 """
1012 offset = getattr(self, "offset", 0)
1013 exposure = getattr(self, "exposure", 0)
1014 XB = np.dot(self.exog, params) + offset + exposure
1015 endog = self.endog
1016 return np.sum(-np.exp(XB) + endog*XB - gammaln(endog+1))
1018 def loglikeobs(self, params):
1019 """
1020 Loglikelihood for observations of Poisson model
1022 Parameters
1023 ----------
1024 params : array_like
1025 The parameters of the model.
1027 Returns
1028 -------
1029 loglike : array_like
1030 The log likelihood for each observation of the model evaluated
1031 at `params`. See Notes
1033 Notes
1034 --------
1035 .. math:: \\ln L_{i}=\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
1037 for observations :math:`i=1,...,n`
1038 """
1039 offset = getattr(self, "offset", 0)
1040 exposure = getattr(self, "exposure", 0)
1041 XB = np.dot(self.exog, params) + offset + exposure
1042 endog = self.endog
1043 #np.sum(stats.poisson.logpmf(endog, np.exp(XB)))
1044 return -np.exp(XB) + endog*XB - gammaln(endog+1)
1046 @Appender(_get_start_params_null_docs)
1047 def _get_start_params_null(self):
1048 offset = getattr(self, "offset", 0)
1049 exposure = getattr(self, "exposure", 0)
1050 const = (self.endog / np.exp(offset + exposure)).mean()
1051 params = [np.log(const)]
1052 return params
1054 @Appender(DiscreteModel.fit.__doc__)
1055 def fit(self, start_params=None, method='newton', maxiter=35,
1056 full_output=1, disp=1, callback=None, **kwargs):
1058 if start_params is None and self.data.const_idx is not None:
1059 # k_params or k_exog not available?
1060 start_params = 0.001 * np.ones(self.exog.shape[1])
1061 start_params[self.data.const_idx] = self._get_start_params_null()[0]
1063 cntfit = super(CountModel, self).fit(start_params=start_params,
1064 method=method, maxiter=maxiter, full_output=full_output,
1065 disp=disp, callback=callback, **kwargs)
1067 if 'cov_type' in kwargs:
1068 cov_kwds = kwargs.get('cov_kwds', {})
1069 kwds = {'cov_type':kwargs['cov_type'], 'cov_kwds':cov_kwds}
1070 else:
1071 kwds = {}
1072 discretefit = PoissonResults(self, cntfit, **kwds)
1073 return PoissonResultsWrapper(discretefit)
1075 @Appender(DiscreteModel.fit_regularized.__doc__)
1076 def fit_regularized(self, start_params=None, method='l1',
1077 maxiter='defined_by_method', full_output=1, disp=1, callback=None,
1078 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
1079 qc_tol=0.03, **kwargs):
1081 _validate_l1_method(method)
1083 cntfit = super(CountModel, self).fit_regularized(
1084 start_params=start_params, method=method, maxiter=maxiter,
1085 full_output=full_output, disp=disp, callback=callback,
1086 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
1087 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
1089 discretefit = L1PoissonResults(self, cntfit)
1090 return L1PoissonResultsWrapper(discretefit)
1092 def fit_constrained(self, constraints, start_params=None, **fit_kwds):
1093 """fit the model subject to linear equality constraints
1095 The constraints are of the form `R params = q`
1096 where R is the constraint_matrix and q is the vector of
1097 constraint_values.
1099 The estimation creates a new model with transformed design matrix,
1100 exog, and converts the results back to the original parameterization.
1102 Parameters
1103 ----------
1104 constraints : formula expression or tuple
1105 If it is a tuple, then the constraint needs to be given by two
1106 arrays (constraint_matrix, constraint_value), i.e. (R, q).
1107 Otherwise, the constraints can be given as strings or list of
1108 strings.
1109 see t_test for details
1110 start_params : None or array_like
1111 starting values for the optimization. `start_params` needs to be
1112 given in the original parameter space and are internally
1113 transformed.
1114 **fit_kwds : keyword arguments
1115 fit_kwds are used in the optimization of the transformed model.
1117 Returns
1118 -------
1119 results : Results instance
1120 """
1122 #constraints = (R, q)
1123 # TODO: temporary trailing underscore to not overwrite the monkey
1124 # patched version
1125 # TODO: decide whether to move the imports
1126 from patsy import DesignInfo
1127 from statsmodels.base._constraints import fit_constrained
1129 # same pattern as in base.LikelihoodModel.t_test
1130 lc = DesignInfo(self.exog_names).linear_constraint(constraints)
1131 R, q = lc.coefs, lc.constants
1133 # TODO: add start_params option, need access to tranformation
1134 # fit_constrained needs to do the transformation
1135 params, cov, res_constr = fit_constrained(self, R, q,
1136 start_params=start_params,
1137 fit_kwds=fit_kwds)
1138 #create dummy results Instance, TODO: wire up properly
1139 res = self.fit(maxiter=0, method='nm', disp=0,
1140 warn_convergence=False) # we get a wrapper back
1141 res.mle_retvals['fcall'] = res_constr.mle_retvals.get('fcall', np.nan)
1142 res.mle_retvals['iterations'] = res_constr.mle_retvals.get(
1143 'iterations', np.nan)
1144 res.mle_retvals['converged'] = res_constr.mle_retvals['converged']
1145 res._results.params = params
1146 res._results.cov_params_default = cov
1147 cov_type = fit_kwds.get('cov_type', 'nonrobust')
1148 if cov_type != 'nonrobust':
1149 res._results.normalized_cov_params = cov # assume scale=1
1150 else:
1151 res._results.normalized_cov_params = None
1152 k_constr = len(q)
1153 res._results.df_resid += k_constr
1154 res._results.df_model -= k_constr
1155 res._results.constraints = lc
1156 res._results.k_constr = k_constr
1157 res._results.results_constrained = res_constr
1158 return res
1161 def score(self, params):
1162 """
1163 Poisson model score (gradient) vector of the log-likelihood
1165 Parameters
1166 ----------
1167 params : array_like
1168 The parameters of the model
1170 Returns
1171 -------
1172 score : ndarray, 1-D
1173 The score vector of the model, i.e. the first derivative of the
1174 loglikelihood function, evaluated at `params`
1176 Notes
1177 -----
1178 .. math:: \\frac{\\partial\\ln L}{\\partial\\beta}=\\sum_{i=1}^{n}\\left(y_{i}-\\lambda_{i}\\right)x_{i}
1180 where the loglinear model is assumed
1182 .. math:: \\ln\\lambda_{i}=x_{i}\\beta
1183 """
1184 offset = getattr(self, "offset", 0)
1185 exposure = getattr(self, "exposure", 0)
1186 X = self.exog
1187 L = np.exp(np.dot(X,params) + offset + exposure)
1188 return np.dot(self.endog - L, X)
1190 def score_obs(self, params):
1191 """
1192 Poisson model Jacobian of the log-likelihood for each observation
1194 Parameters
1195 ----------
1196 params : array_like
1197 The parameters of the model
1199 Returns
1200 -------
1201 score : array_like
1202 The score vector (nobs, k_vars) of the model evaluated at `params`
1204 Notes
1205 -----
1206 .. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left(y_{i}-\\lambda_{i}\\right)x_{i}
1208 for observations :math:`i=1,...,n`
1210 where the loglinear model is assumed
1212 .. math:: \\ln\\lambda_{i}=x_{i}\\beta
1213 """
1214 offset = getattr(self, "offset", 0)
1215 exposure = getattr(self, "exposure", 0)
1216 X = self.exog
1217 L = np.exp(np.dot(X,params) + offset + exposure)
1218 return (self.endog - L)[:,None] * X
1220 def score_factor(self, params):
1221 """
1222 Poisson model score_factor for each observation
1224 Parameters
1225 ----------
1226 params : array_like
1227 The parameters of the model
1229 Returns
1230 -------
1231 score : array_like
1232 The score factor (nobs, ) of the model evaluated at `params`
1234 Notes
1235 -----
1236 .. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left(y_{i}-\\lambda_{i}\\right)
1238 for observations :math:`i=1,...,n`
1240 where the loglinear model is assumed
1242 .. math:: \\ln\\lambda_{i}=x_{i}\\beta
1243 """
1244 offset = getattr(self, "offset", 0)
1245 exposure = getattr(self, "exposure", 0)
1246 X = self.exog
1247 L = np.exp(np.dot(X,params) + offset + exposure)
1248 return (self.endog - L)
1251 def hessian(self, params):
1252 """
1253 Poisson model Hessian matrix of the loglikelihood
1255 Parameters
1256 ----------
1257 params : array_like
1258 The parameters of the model
1260 Returns
1261 -------
1262 hess : ndarray, (k_vars, k_vars)
1263 The Hessian, second derivative of loglikelihood function,
1264 evaluated at `params`
1266 Notes
1267 -----
1268 .. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\sum_{i=1}^{n}\\lambda_{i}x_{i}x_{i}^{\\prime}
1270 where the loglinear model is assumed
1272 .. math:: \\ln\\lambda_{i}=x_{i}\\beta
1273 """
1274 offset = getattr(self, "offset", 0)
1275 exposure = getattr(self, "exposure", 0)
1276 X = self.exog
1277 L = np.exp(np.dot(X,params) + exposure + offset)
1278 return -np.dot(L*X.T, X)
1280 def hessian_factor(self, params):
1281 """
1282 Poisson model Hessian factor
1284 Parameters
1285 ----------
1286 params : array_like
1287 The parameters of the model
1289 Returns
1290 -------
1291 hess : ndarray, (nobs,)
1292 The Hessian factor, second derivative of loglikelihood function
1293 with respect to the linear predictor evaluated at `params`
1295 Notes
1296 -----
1297 .. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\sum_{i=1}^{n}\\lambda_{i}
1299 where the loglinear model is assumed
1301 .. math:: \\ln\\lambda_{i}=x_{i}\\beta
1302 """
1303 offset = getattr(self, "offset", 0)
1304 exposure = getattr(self, "exposure", 0)
1305 X = self.exog
1306 L = np.exp(np.dot(X,params) + exposure + offset)
1307 return L
1310class GeneralizedPoisson(CountModel):
1311 __doc__ = """
1312 Generalized Poisson Model
1314 %(params)s
1315 %(extra_params)s
1317 Attributes
1318 ----------
1319 endog : ndarray
1320 A reference to the endogenous response variable
1321 exog : ndarray
1322 A reference to the exogenous design.
1323 """ % {'params' : base._model_params_doc,
1324 'extra_params' :
1325 """
1326 p : scalar
1327 P denotes parameterizations for GP regression. p=1 for GP-1 and
1328 p=2 for GP-2. Default is p=1.
1329 offset : array_like
1330 Offset is added to the linear prediction with coefficient equal to 1.
1331 exposure : array_like
1332 Log(exposure) is added to the linear prediction with coefficient
1333 equal to 1.
1334 """ + base._missing_param_doc}
1336 def __init__(self, endog, exog, p = 1, offset=None,
1337 exposure=None, missing='none', **kwargs):
1338 super(GeneralizedPoisson, self).__init__(endog, exog, offset=offset,
1339 exposure=exposure,
1340 missing=missing, **kwargs)
1341 self.parameterization = p - 1
1342 self.exog_names.append('alpha')
1343 self.k_extra = 1
1344 self._transparams = False
1346 def _get_init_kwds(self):
1347 kwds = super(GeneralizedPoisson, self)._get_init_kwds()
1348 kwds['p'] = self.parameterization + 1
1349 return kwds
1351 def loglike(self, params):
1352 """
1353 Loglikelihood of Generalized Poisson model
1355 Parameters
1356 ----------
1357 params : array_like
1358 The parameters of the model.
1360 Returns
1361 -------
1362 loglike : float
1363 The log-likelihood function of the model evaluated at `params`.
1364 See notes.
1366 Notes
1367 --------
1368 .. math:: \\ln L=\\sum_{i=1}^{n}\\left[\\mu_{i}+(y_{i}-1)*ln(\\mu_{i}+
1369 \\alpha*\\mu_{i}^{p-1}*y_{i})-y_{i}*ln(1+\\alpha*\\mu_{i}^{p-1})-
1370 ln(y_{i}!)-\\frac{\\mu_{i}+\\alpha*\\mu_{i}^{p-1}*y_{i}}{1+\\alpha*
1371 \\mu_{i}^{p-1}}\\right]
1372 """
1373 return np.sum(self.loglikeobs(params))
1375 def loglikeobs(self, params):
1376 """
1377 Loglikelihood for observations of Generalized Poisson model
1379 Parameters
1380 ----------
1381 params : array_like
1382 The parameters of the model.
1384 Returns
1385 -------
1386 loglike : ndarray
1387 The log likelihood for each observation of the model evaluated
1388 at `params`. See Notes
1390 Notes
1391 --------
1392 .. math:: \\ln L=\\sum_{i=1}^{n}\\left[\\mu_{i}+(y_{i}-1)*ln(\\mu_{i}+
1393 \\alpha*\\mu_{i}^{p-1}*y_{i})-y_{i}*ln(1+\\alpha*\\mu_{i}^{p-1})-
1394 ln(y_{i}!)-\\frac{\\mu_{i}+\\alpha*\\mu_{i}^{p-1}*y_{i}}{1+\\alpha*
1395 \\mu_{i}^{p-1}}\\right]
1397 for observations :math:`i=1,...,n`
1398 """
1399 if self._transparams:
1400 alpha = np.exp(params[-1])
1401 else:
1402 alpha = params[-1]
1403 params = params[:-1]
1404 p = self.parameterization
1405 endog = self.endog
1406 mu = self.predict(params)
1407 mu_p = np.power(mu, p)
1408 a1 = 1 + alpha * mu_p
1409 a2 = mu + (a1 - 1) * endog
1410 return (np.log(mu) + (endog - 1) * np.log(a2) - endog *
1411 np.log(a1) - gammaln(endog + 1) - a2 / a1)
1413 @Appender(_get_start_params_null_docs)
1414 def _get_start_params_null(self):
1415 offset = getattr(self, "offset", 0)
1416 exposure = getattr(self, "exposure", 0)
1418 const = (self.endog / np.exp(offset + exposure)).mean()
1419 params = [np.log(const)]
1420 mu = const * np.exp(offset + exposure)
1421 resid = self.endog - mu
1422 a = self._estimate_dispersion(mu, resid, df_resid=resid.shape[0] - 1)
1423 params.append(a)
1425 return np.array(params)
1427 def _estimate_dispersion(self, mu, resid, df_resid=None):
1428 q = self.parameterization
1429 if df_resid is None:
1430 df_resid = resid.shape[0]
1431 a = ((np.abs(resid) / np.sqrt(mu) - 1) * mu**(-q)).sum() / df_resid
1432 return a
1435 @Appender(
1436 """
1437 use_transparams : bool
1438 This parameter enable internal transformation to impose
1439 non-negativity. True to enable. Default is False.
1440 use_transparams=True imposes the no underdispersion (alpha > 0)
1441 constraint. In case use_transparams=True and method="newton" or
1442 "ncg" transformation is ignored.
1443 """)
1444 @Appender(DiscreteModel.fit.__doc__)
1445 def fit(self, start_params=None, method='bfgs', maxiter=35,
1446 full_output=1, disp=1, callback=None, use_transparams=False,
1447 cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs):
1448 if use_transparams and method not in ['newton', 'ncg']:
1449 self._transparams = True
1450 else:
1451 if use_transparams:
1452 warnings.warn('Parameter "use_transparams" is ignored',
1453 RuntimeWarning)
1454 self._transparams = False
1456 if start_params is None:
1457 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
1458 if np.size(offset) == 1 and offset == 0:
1459 offset = None
1460 optim_kwds_prelim = {'disp': 0, 'skip_hessian': True,
1461 'warn_convergence': False}
1462 optim_kwds_prelim.update(kwargs.get('optim_kwds_prelim', {}))
1463 mod_poi = Poisson(self.endog, self.exog, offset=offset)
1464 res_poi = mod_poi.fit(**optim_kwds_prelim)
1465 start_params = res_poi.params
1466 a = self._estimate_dispersion(res_poi.predict(), res_poi.resid,
1467 df_resid=res_poi.df_resid)
1468 start_params = np.append(start_params, max(-0.1, a))
1470 if callback is None:
1471 # work around perfect separation callback #3895
1472 callback = lambda *x: x
1474 mlefit = super(GeneralizedPoisson, self).fit(start_params=start_params,
1475 maxiter=maxiter, method=method, disp=disp,
1476 full_output=full_output, callback=callback,
1477 **kwargs)
1479 if use_transparams and method not in ["newton", "ncg"]:
1480 self._transparams = False
1481 mlefit._results.params[-1] = np.exp(mlefit._results.params[-1])
1483 gpfit = GeneralizedPoissonResults(self, mlefit._results)
1484 result = GeneralizedPoissonResultsWrapper(gpfit)
1486 if cov_kwds is None:
1487 cov_kwds = {}
1489 result._get_robustcov_results(cov_type=cov_type,
1490 use_self=True, use_t=use_t, **cov_kwds)
1491 return result
1493 @Appender(DiscreteModel.fit_regularized.__doc__)
1494 def fit_regularized(self, start_params=None, method='l1',
1495 maxiter='defined_by_method', full_output=1, disp=1, callback=None,
1496 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
1497 qc_tol=0.03, **kwargs):
1499 _validate_l1_method(method)
1501 if np.size(alpha) == 1 and alpha != 0:
1502 k_params = self.exog.shape[1] + self.k_extra
1503 alpha = alpha * np.ones(k_params)
1504 alpha[-1] = 0
1506 alpha_p = alpha[:-1] if (self.k_extra and np.size(alpha) > 1) else alpha
1507 self._transparams = False
1508 if start_params is None:
1509 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
1510 if np.size(offset) == 1 and offset == 0:
1511 offset = None
1512 mod_poi = Poisson(self.endog, self.exog, offset=offset)
1513 start_params = mod_poi.fit_regularized(
1514 start_params=start_params, method=method, maxiter=maxiter,
1515 full_output=full_output, disp=0, callback=callback,
1516 alpha=alpha_p, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
1517 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params
1518 start_params = np.append(start_params, 0.1)
1520 cntfit = super(CountModel, self).fit_regularized(
1521 start_params=start_params, method=method, maxiter=maxiter,
1522 full_output=full_output, disp=disp, callback=callback,
1523 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
1524 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
1526 discretefit = L1GeneralizedPoissonResults(self, cntfit)
1527 return L1GeneralizedPoissonResultsWrapper(discretefit)
1529 def score_obs(self, params):
1530 if self._transparams:
1531 alpha = np.exp(params[-1])
1532 else:
1533 alpha = params[-1]
1535 params = params[:-1]
1536 p = self.parameterization
1537 exog = self.exog
1538 y = self.endog[:,None]
1539 mu = self.predict(params)[:,None]
1540 mu_p = np.power(mu, p)
1541 a1 = 1 + alpha * mu_p
1542 a2 = mu + alpha * mu_p * y
1543 a3 = alpha * p * mu ** (p - 1)
1544 a4 = a3 * y
1545 dmudb = mu * exog
1547 dalpha = (mu_p * (y * ((y - 1) / a2 - 2 / a1) + a2 / a1**2))
1548 dparams = dmudb * (-a4 / a1 +
1549 a3 * a2 / (a1 ** 2) +
1550 (1 + a4) * ((y - 1) / a2 - 1 / a1) +
1551 1 / mu)
1553 return np.concatenate((dparams, np.atleast_2d(dalpha)),
1554 axis=1)
1556 def score(self, params):
1557 score = np.sum(self.score_obs(params), axis=0)
1558 if self._transparams:
1559 score[-1] == score[-1] ** 2
1560 return score
1561 else:
1562 return score
1564 def _score_p(self, params):
1565 """
1566 Generalized Poisson model derivative of the log-likelihood by p-parameter
1568 Parameters
1569 ----------
1570 params : array_like
1571 The parameters of the model
1573 Returns
1574 -------
1575 dldp : float
1576 dldp is first derivative of the loglikelihood function,
1577 evaluated at `p-parameter`.
1578 """
1579 if self._transparams:
1580 alpha = np.exp(params[-1])
1581 else:
1582 alpha = params[-1]
1583 params = params[:-1]
1584 p = self.parameterization
1585 exog = self.exog
1586 y = self.endog[:,None]
1587 mu = self.predict(params)[:,None]
1588 mu_p = np.power(mu, p)
1589 a1 = 1 + alpha * mu_p
1590 a2 = mu + alpha * mu_p * y
1592 dp = np.sum((np.log(mu) * ((a2 - mu) * ((y - 1) / a2 - 2 / a1) +
1593 (a1 - 1) * a2 / a1 ** 2)))
1594 return dp
1596 def hessian(self, params):
1597 """
1598 Generalized Poisson model Hessian matrix of the loglikelihood
1600 Parameters
1601 ----------
1602 params : array_like
1603 The parameters of the model
1605 Returns
1606 -------
1607 hess : ndarray, (k_vars, k_vars)
1608 The Hessian, second derivative of loglikelihood function,
1609 evaluated at `params`
1610 """
1611 if self._transparams:
1612 alpha = np.exp(params[-1])
1613 else:
1614 alpha = params[-1]
1616 params = params[:-1]
1617 p = self.parameterization
1618 exog = self.exog
1619 y = self.endog[:,None]
1620 mu = self.predict(params)[:,None]
1621 mu_p = np.power(mu, p)
1622 a1 = 1 + alpha * mu_p
1623 a2 = mu + alpha * mu_p * y
1624 a3 = alpha * p * mu ** (p - 1)
1625 a4 = a3 * y
1626 a5 = p * mu ** (p - 1)
1627 dmudb = mu * exog
1629 # for dl/dparams dparams
1630 dim = exog.shape[1]
1631 hess_arr = np.empty((dim+1,dim+1))
1633 for i in range(dim):
1634 for j in range(i + 1):
1635 hess_arr[i,j] = np.sum(mu * exog[:,i,None] * exog[:,j,None] *
1636 (mu * (a3 * a4 / a1**2 -
1637 2 * a3**2 * a2 / a1**3 +
1638 2 * a3 * (a4 + 1) / a1**2 -
1639 a4 * p / (mu * a1) +
1640 a3 * p * a2 / (mu * a1**2) +
1641 (y - 1) * a4 * (p - 1) / (a2 * mu) -
1642 (y - 1) * (1 + a4)**2 / a2**2 -
1643 a4 * (p - 1) / (a1 * mu)) +
1644 ((y - 1) * (1 + a4) / a2 -
1645 (1 + a4) / a1)), axis=0)
1646 tri_idx = np.triu_indices(dim, k=1)
1647 hess_arr[tri_idx] = hess_arr.T[tri_idx]
1649 # for dl/dparams dalpha
1650 dldpda = np.sum((2 * a4 * mu_p / a1**2 -
1651 2 * a3 * mu_p * a2 / a1**3 -
1652 mu_p * y * (y - 1) * (1 + a4) / a2**2 +
1653 mu_p * (1 + a4) / a1**2 +
1654 a5 * y * (y - 1) / a2 -
1655 2 * a5 * y / a1 +
1656 a5 * a2 / a1**2) * dmudb,
1657 axis=0)
1659 hess_arr[-1,:-1] = dldpda
1660 hess_arr[:-1,-1] = dldpda
1662 # for dl/dalpha dalpha
1663 dldada = mu_p**2 * (3 * y / a1**2 -
1664 (y / a2)**2. * (y - 1) -
1665 2 * a2 / a1**3)
1667 hess_arr[-1,-1] = dldada.sum()
1669 return hess_arr
1671 def predict(self, params, exog=None, exposure=None, offset=None,
1672 which='mean'):
1673 """
1674 Predict response variable of a count model given exogenous variables.
1676 Notes
1677 -----
1678 If exposure is specified, then it will be logged by the method.
1679 The user does not need to log it first.
1680 """
1681 if exog is None:
1682 exog = self.exog
1684 if exposure is None:
1685 exposure = getattr(self, 'exposure', 0)
1686 elif exposure != 0:
1687 exposure = np.log(exposure)
1689 if offset is None:
1690 offset = getattr(self, 'offset', 0)
1692 fitted = np.dot(exog, params[:exog.shape[1]])
1693 linpred = fitted + exposure + offset
1695 if which == 'mean':
1696 return np.exp(linpred)
1697 elif which == 'linear':
1698 return linpred
1699 elif which =='prob':
1700 counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
1701 mu = self.predict(params, exog=exog, exposure=exposure,
1702 offset=offset)[:,None]
1703 return genpoisson_p.pmf(counts, mu, params[-1],
1704 self.parameterization + 1)
1705 else:
1706 raise ValueError('keyword \'which\' not recognized')
1709class Logit(BinaryModel):
1710 __doc__ = """
1711 Logit Model
1713 %(params)s
1714 %(extra_params)s
1716 Attributes
1717 ----------
1718 endog : ndarray
1719 A reference to the endogenous response variable
1720 exog : ndarray
1721 A reference to the exogenous design.
1722 """ % {'params' : base._model_params_doc,
1723 'extra_params' : base._missing_param_doc}
1725 def cdf(self, X):
1726 """
1727 The logistic cumulative distribution function
1729 Parameters
1730 ----------
1731 X : array_like
1732 `X` is the linear predictor of the logit model. See notes.
1734 Returns
1735 -------
1736 1/(1 + exp(-X))
1738 Notes
1739 -----
1740 In the logit model,
1742 .. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
1743 \\text{Prob}\\left(Y=1|x\\right)=
1744 \\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
1745 """
1746 X = np.asarray(X)
1747 return 1/(1+np.exp(-X))
1749 def pdf(self, X):
1750 """
1751 The logistic probability density function
1753 Parameters
1754 ----------
1755 X : array_like
1756 `X` is the linear predictor of the logit model. See notes.
1758 Returns
1759 -------
1760 pdf : ndarray
1761 The value of the Logit probability mass function, PMF, for each
1762 point of X. ``np.exp(-x)/(1+np.exp(-X))**2``
1764 Notes
1765 -----
1766 In the logit model,
1768 .. math:: \\lambda\\left(x^{\\prime}\\beta\\right)=\\frac{e^{-x^{\\prime}\\beta}}{\\left(1+e^{-x^{\\prime}\\beta}\\right)^{2}}
1769 """
1770 X = np.asarray(X)
1771 return np.exp(-X)/(1+np.exp(-X))**2
1773 def loglike(self, params):
1774 """
1775 Log-likelihood of logit model.
1777 Parameters
1778 ----------
1779 params : array_like
1780 The parameters of the logit model.
1782 Returns
1783 -------
1784 loglike : float
1785 The log-likelihood function of the model evaluated at `params`.
1786 See notes.
1788 Notes
1789 -----
1790 .. math::
1792 \\ln L=\\sum_{i}\\ln\\Lambda
1793 \\left(q_{i}x_{i}^{\\prime}\\beta\\right)
1795 Where :math:`q=2y-1`. This simplification comes from the fact that the
1796 logistic distribution is symmetric.
1797 """
1798 q = 2*self.endog - 1
1799 X = self.exog
1800 return np.sum(np.log(self.cdf(q*np.dot(X,params))))
1802 def loglikeobs(self, params):
1803 """
1804 Log-likelihood of logit model for each observation.
1806 Parameters
1807 ----------
1808 params : array_like
1809 The parameters of the logit model.
1811 Returns
1812 -------
1813 loglike : ndarray
1814 The log likelihood for each observation of the model evaluated
1815 at `params`. See Notes
1817 Notes
1818 -----
1819 .. math::
1821 \\ln L=\\sum_{i}\\ln\\Lambda
1822 \\left(q_{i}x_{i}^{\\prime}\\beta\\right)
1824 for observations :math:`i=1,...,n`
1826 where :math:`q=2y-1`. This simplification comes from the fact that the
1827 logistic distribution is symmetric.
1828 """
1829 q = 2*self.endog - 1
1830 X = self.exog
1831 return np.log(self.cdf(q*np.dot(X,params)))
1833 def score(self, params):
1834 """
1835 Logit model score (gradient) vector of the log-likelihood
1837 Parameters
1838 ----------
1839 params : array_like
1840 The parameters of the model
1842 Returns
1843 -------
1844 score : ndarray, 1-D
1845 The score vector of the model, i.e. the first derivative of the
1846 loglikelihood function, evaluated at `params`
1848 Notes
1849 -----
1850 .. math:: \\frac{\\partial\\ln L}{\\partial\\beta}=\\sum_{i=1}^{n}\\left(y_{i}-\\Lambda_{i}\\right)x_{i}
1851 """
1853 y = self.endog
1854 X = self.exog
1855 L = self.cdf(np.dot(X,params))
1856 return np.dot(y - L,X)
1858 def score_obs(self, params):
1859 """
1860 Logit model Jacobian of the log-likelihood for each observation
1862 Parameters
1863 ----------
1864 params : array_like
1865 The parameters of the model
1867 Returns
1868 -------
1869 jac : array_like
1870 The derivative of the loglikelihood for each observation evaluated
1871 at `params`.
1873 Notes
1874 -----
1875 .. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left(y_{i}-\\Lambda_{i}\\right)x_{i}
1877 for observations :math:`i=1,...,n`
1878 """
1880 y = self.endog
1881 X = self.exog
1882 L = self.cdf(np.dot(X, params))
1883 return (y - L)[:,None] * X
1885 def hessian(self, params):
1886 """
1887 Logit model Hessian matrix of the log-likelihood
1889 Parameters
1890 ----------
1891 params : array_like
1892 The parameters of the model
1894 Returns
1895 -------
1896 hess : ndarray, (k_vars, k_vars)
1897 The Hessian, second derivative of loglikelihood function,
1898 evaluated at `params`
1900 Notes
1901 -----
1902 .. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\sum_{i}\\Lambda_{i}\\left(1-\\Lambda_{i}\\right)x_{i}x_{i}^{\\prime}
1903 """
1904 X = self.exog
1905 L = self.cdf(np.dot(X,params))
1906 return -np.dot(L*(1-L)*X.T,X)
1908 @Appender(DiscreteModel.fit.__doc__)
1909 def fit(self, start_params=None, method='newton', maxiter=35,
1910 full_output=1, disp=1, callback=None, **kwargs):
1911 bnryfit = super(Logit, self).fit(start_params=start_params,
1912 method=method, maxiter=maxiter, full_output=full_output,
1913 disp=disp, callback=callback, **kwargs)
1915 discretefit = LogitResults(self, bnryfit)
1916 return BinaryResultsWrapper(discretefit)
1919class Probit(BinaryModel):
1920 __doc__ = """
1921 Probit Model
1923 %(params)s
1924 %(extra_params)s
1926 Attributes
1927 ----------
1928 endog : ndarray
1929 A reference to the endogenous response variable
1930 exog : ndarray
1931 A reference to the exogenous design.
1932 """ % {'params' : base._model_params_doc,
1933 'extra_params' : base._missing_param_doc}
1935 def cdf(self, X):
1936 """
1937 Probit (Normal) cumulative distribution function
1939 Parameters
1940 ----------
1941 X : array_like
1942 The linear predictor of the model (XB).
1944 Returns
1945 -------
1946 cdf : ndarray
1947 The cdf evaluated at `X`.
1949 Notes
1950 -----
1951 This function is just an alias for scipy.stats.norm.cdf
1952 """
1953 return stats.norm._cdf(X)
1955 def pdf(self, X):
1956 """
1957 Probit (Normal) probability density function
1959 Parameters
1960 ----------
1961 X : array_like
1962 The linear predictor of the model (XB).
1964 Returns
1965 -------
1966 pdf : ndarray
1967 The value of the normal density function for each point of X.
1969 Notes
1970 -----
1971 This function is just an alias for scipy.stats.norm.pdf
1972 """
1973 X = np.asarray(X)
1974 return stats.norm._pdf(X)
1977 def loglike(self, params):
1978 """
1979 Log-likelihood of probit model (i.e., the normal distribution).
1981 Parameters
1982 ----------
1983 params : array_like
1984 The parameters of the model.
1986 Returns
1987 -------
1988 loglike : float
1989 The log-likelihood function of the model evaluated at `params`.
1990 See notes.
1992 Notes
1993 -----
1994 .. math:: \\ln L=\\sum_{i}\\ln\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)
1996 Where :math:`q=2y-1`. This simplification comes from the fact that the
1997 normal distribution is symmetric.
1998 """
2000 q = 2*self.endog - 1
2001 X = self.exog
2002 return np.sum(np.log(np.clip(self.cdf(q*np.dot(X,params)),
2003 FLOAT_EPS, 1)))
2005 def loglikeobs(self, params):
2006 """
2007 Log-likelihood of probit model for each observation
2009 Parameters
2010 ----------
2011 params : array_like
2012 The parameters of the model.
2014 Returns
2015 -------
2016 loglike : array_like
2017 The log likelihood for each observation of the model evaluated
2018 at `params`. See Notes
2020 Notes
2021 -----
2022 .. math:: \\ln L_{i}=\\ln\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)
2024 for observations :math:`i=1,...,n`
2026 where :math:`q=2y-1`. This simplification comes from the fact that the
2027 normal distribution is symmetric.
2028 """
2030 q = 2*self.endog - 1
2031 X = self.exog
2032 return np.log(np.clip(self.cdf(q*np.dot(X,params)), FLOAT_EPS, 1))
2035 def score(self, params):
2036 """
2037 Probit model score (gradient) vector
2039 Parameters
2040 ----------
2041 params : array_like
2042 The parameters of the model
2044 Returns
2045 -------
2046 score : ndarray, 1-D
2047 The score vector of the model, i.e. the first derivative of the
2048 loglikelihood function, evaluated at `params`
2050 Notes
2051 -----
2052 .. math:: \\frac{\\partial\\ln L}{\\partial\\beta}=\\sum_{i=1}^{n}\\left[\\frac{q_{i}\\phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}{\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}\\right]x_{i}
2054 Where :math:`q=2y-1`. This simplification comes from the fact that the
2055 normal distribution is symmetric.
2056 """
2057 y = self.endog
2058 X = self.exog
2059 XB = np.dot(X,params)
2060 q = 2*y - 1
2061 # clip to get rid of invalid divide complaint
2062 L = q*self.pdf(q*XB)/np.clip(self.cdf(q*XB), FLOAT_EPS, 1 - FLOAT_EPS)
2063 return np.dot(L,X)
2065 def score_obs(self, params):
2066 """
2067 Probit model Jacobian for each observation
2069 Parameters
2070 ----------
2071 params : array_like
2072 The parameters of the model
2074 Returns
2075 -------
2076 jac : array_like
2077 The derivative of the loglikelihood for each observation evaluated
2078 at `params`.
2080 Notes
2081 -----
2082 .. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left[\\frac{q_{i}\\phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}{\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}\\right]x_{i}
2084 for observations :math:`i=1,...,n`
2086 Where :math:`q=2y-1`. This simplification comes from the fact that the
2087 normal distribution is symmetric.
2088 """
2089 y = self.endog
2090 X = self.exog
2091 XB = np.dot(X,params)
2092 q = 2*y - 1
2093 # clip to get rid of invalid divide complaint
2094 L = q*self.pdf(q*XB)/np.clip(self.cdf(q*XB), FLOAT_EPS, 1 - FLOAT_EPS)
2095 return L[:,None] * X
2097 def hessian(self, params):
2098 """
2099 Probit model Hessian matrix of the log-likelihood
2101 Parameters
2102 ----------
2103 params : array_like
2104 The parameters of the model
2106 Returns
2107 -------
2108 hess : ndarray, (k_vars, k_vars)
2109 The Hessian, second derivative of loglikelihood function,
2110 evaluated at `params`
2112 Notes
2113 -----
2114 .. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\lambda_{i}\\left(\\lambda_{i}+x_{i}^{\\prime}\\beta\\right)x_{i}x_{i}^{\\prime}
2116 where
2118 .. math:: \\lambda_{i}=\\frac{q_{i}\\phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}{\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}
2120 and :math:`q=2y-1`
2121 """
2122 X = self.exog
2123 XB = np.dot(X,params)
2124 q = 2*self.endog - 1
2125 L = q*self.pdf(q*XB)/self.cdf(q*XB)
2126 return np.dot(-L*(L+XB)*X.T,X)
2128 @Appender(DiscreteModel.fit.__doc__)
2129 def fit(self, start_params=None, method='newton', maxiter=35,
2130 full_output=1, disp=1, callback=None, **kwargs):
2131 bnryfit = super(Probit, self).fit(start_params=start_params,
2132 method=method, maxiter=maxiter, full_output=full_output,
2133 disp=disp, callback=callback, **kwargs)
2134 discretefit = ProbitResults(self, bnryfit)
2135 return BinaryResultsWrapper(discretefit)
2138class MNLogit(MultinomialModel):
2139 __doc__ = """
2140 Multinomial Logit Model
2142 Parameters
2143 ----------
2144 endog : array_like
2145 `endog` is an 1-d vector of the endogenous response. `endog` can
2146 contain strings, ints, or floats or may be a pandas Categorical Series.
2147 Note that if it contains strings, every distinct string will be a
2148 category. No stripping of whitespace is done.
2149 exog : array_like
2150 A nobs x k array where `nobs` is the number of observations and `k`
2151 is the number of regressors. An intercept is not included by default
2152 and should be added by the user. See `statsmodels.tools.add_constant`.
2153 %(extra_params)s
2155 Attributes
2156 ----------
2157 endog : ndarray
2158 A reference to the endogenous response variable
2159 exog : ndarray
2160 A reference to the exogenous design.
2161 J : float
2162 The number of choices for the endogenous variable. Note that this
2163 is zero-indexed.
2164 K : float
2165 The actual number of parameters for the exogenous design. Includes
2166 the constant if the design has one.
2167 names : dict
2168 A dictionary mapping the column number in `wendog` to the variables
2169 in `endog`.
2170 wendog : ndarray
2171 An n x j array where j is the number of unique categories in `endog`.
2172 Each column of j is a dummy variable indicating the category of
2173 each observation. See `names` for a dictionary mapping each column to
2174 its category.
2176 Notes
2177 -----
2178 See developer notes for further information on `MNLogit` internals.
2179 """ % {'extra_params': base._missing_param_doc}
2181 def __init__(self, endog, exog, **kwargs):
2182 super(MNLogit, self).__init__(endog, exog, **kwargs)
2184 # Override cov_names since multivariate model
2185 yname = self.endog_names
2186 ynames = self._ynames_map
2187 ynames = MultinomialResults._maybe_convert_ynames_int(ynames)
2188 # use range below to ensure sortedness
2189 ynames = [ynames[key] for key in range(int(self.J))]
2190 idx = MultiIndex.from_product((ynames[1:], self.data.xnames),
2191 names=(yname, None))
2192 self.data.cov_names = idx
2194 def pdf(self, eXB):
2195 """
2196 NotImplemented
2197 """
2198 raise NotImplementedError
2200 def cdf(self, X):
2201 """
2202 Multinomial logit cumulative distribution function.
2204 Parameters
2205 ----------
2206 X : ndarray
2207 The linear predictor of the model XB.
2209 Returns
2210 -------
2211 cdf : ndarray
2212 The cdf evaluated at `X`.
2214 Notes
2215 -----
2216 In the multinomial logit model.
2217 .. math:: \\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}
2218 """
2219 eXB = np.column_stack((np.ones(len(X)), np.exp(X)))
2220 return eXB/eXB.sum(1)[:,None]
2222 def loglike(self, params):
2223 """
2224 Log-likelihood of the multinomial logit model.
2226 Parameters
2227 ----------
2228 params : array_like
2229 The parameters of the multinomial logit model.
2231 Returns
2232 -------
2233 loglike : float
2234 The log-likelihood function of the model evaluated at `params`.
2235 See notes.
2237 Notes
2238 -----
2239 .. math::
2241 \\ln L=\\sum_{i=1}^{n}\\sum_{j=0}^{J}d_{ij}\\ln
2242 \\left(\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}
2243 {\\sum_{k=0}^{J}
2244 \\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right)
2246 where :math:`d_{ij}=1` if individual `i` chose alternative `j` and 0
2247 if not.
2248 """
2249 params = params.reshape(self.K, -1, order='F')
2250 d = self.wendog
2251 logprob = np.log(self.cdf(np.dot(self.exog,params)))
2252 return np.sum(d * logprob)
2254 def loglikeobs(self, params):
2255 """
2256 Log-likelihood of the multinomial logit model for each observation.
2258 Parameters
2259 ----------
2260 params : array_like
2261 The parameters of the multinomial logit model.
2263 Returns
2264 -------
2265 loglike : array_like
2266 The log likelihood for each observation of the model evaluated
2267 at `params`. See Notes
2269 Notes
2270 -----
2271 .. math::
2273 \\ln L_{i}=\\sum_{j=0}^{J}d_{ij}\\ln
2274 \\left(\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}
2275 {\\sum_{k=0}^{J}
2276 \\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right)
2278 for observations :math:`i=1,...,n`
2280 where :math:`d_{ij}=1` if individual `i` chose alternative `j` and 0
2281 if not.
2282 """
2283 params = params.reshape(self.K, -1, order='F')
2284 d = self.wendog
2285 logprob = np.log(self.cdf(np.dot(self.exog,params)))
2286 return d * logprob
2288 def score(self, params):
2289 """
2290 Score matrix for multinomial logit model log-likelihood
2292 Parameters
2293 ----------
2294 params : ndarray
2295 The parameters of the multinomial logit model.
2297 Returns
2298 -------
2299 score : ndarray, (K * (J-1),)
2300 The 2-d score vector, i.e. the first derivative of the
2301 loglikelihood function, of the multinomial logit model evaluated at
2302 `params`.
2304 Notes
2305 -----
2306 .. math:: \\frac{\\partial\\ln L}{\\partial\\beta_{j}}=\\sum_{i}\\left(d_{ij}-\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right)x_{i}
2308 for :math:`j=1,...,J`
2310 In the multinomial model the score matrix is K x J-1 but is returned
2311 as a flattened array to work with the solvers.
2312 """
2313 params = params.reshape(self.K, -1, order='F')
2314 firstterm = self.wendog[:,1:] - self.cdf(np.dot(self.exog,
2315 params))[:,1:]
2316 #NOTE: might need to switch terms if params is reshaped
2317 return np.dot(firstterm.T, self.exog).flatten()
2319 def loglike_and_score(self, params):
2320 """
2321 Returns log likelihood and score, efficiently reusing calculations.
2323 Note that both of these returned quantities will need to be negated
2324 before being minimized by the maximum likelihood fitting machinery.
2325 """
2326 params = params.reshape(self.K, -1, order='F')
2327 cdf_dot_exog_params = self.cdf(np.dot(self.exog, params))
2328 loglike_value = np.sum(self.wendog * np.log(cdf_dot_exog_params))
2329 firstterm = self.wendog[:, 1:] - cdf_dot_exog_params[:, 1:]
2330 score_array = np.dot(firstterm.T, self.exog).flatten()
2331 return loglike_value, score_array
2333 def score_obs(self, params):
2334 """
2335 Jacobian matrix for multinomial logit model log-likelihood
2337 Parameters
2338 ----------
2339 params : ndarray
2340 The parameters of the multinomial logit model.
2342 Returns
2343 -------
2344 jac : array_like
2345 The derivative of the loglikelihood for each observation evaluated
2346 at `params` .
2348 Notes
2349 -----
2350 .. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta_{j}}=\\left(d_{ij}-\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right)x_{i}
2352 for :math:`j=1,...,J`, for observations :math:`i=1,...,n`
2354 In the multinomial model the score vector is K x (J-1) but is returned
2355 as a flattened array. The Jacobian has the observations in rows and
2356 the flattened array of derivatives in columns.
2357 """
2358 params = params.reshape(self.K, -1, order='F')
2359 firstterm = self.wendog[:,1:] - self.cdf(np.dot(self.exog,
2360 params))[:,1:]
2361 #NOTE: might need to switch terms if params is reshaped
2362 return (firstterm[:,:,None] * self.exog[:,None,:]).reshape(self.exog.shape[0], -1)
2364 def hessian(self, params):
2365 """
2366 Multinomial logit Hessian matrix of the log-likelihood
2368 Parameters
2369 ----------
2370 params : array_like
2371 The parameters of the model
2373 Returns
2374 -------
2375 hess : ndarray, (J*K, J*K)
2376 The Hessian, second derivative of loglikelihood function with
2377 respect to the flattened parameters, evaluated at `params`
2379 Notes
2380 -----
2381 .. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta_{j}\\partial\\beta_{l}}=-\\sum_{i=1}^{n}\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\left[\\boldsymbol{1}\\left(j=l\\right)-\\frac{\\exp\\left(\\beta_{l}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right]x_{i}x_{l}^{\\prime}
2383 where
2384 :math:`\\boldsymbol{1}\\left(j=l\\right)` equals 1 if `j` = `l` and 0
2385 otherwise.
2387 The actual Hessian matrix has J**2 * K x K elements. Our Hessian
2388 is reshaped to be square (J*K, J*K) so that the solvers can use it.
2390 This implementation does not take advantage of the symmetry of
2391 the Hessian and could probably be refactored for speed.
2392 """
2393 params = params.reshape(self.K, -1, order='F')
2394 X = self.exog
2395 pr = self.cdf(np.dot(X,params))
2396 partials = []
2397 J = self.J
2398 K = self.K
2399 for i in range(J-1):
2400 for j in range(J-1): # this loop assumes we drop the first col.
2401 if i == j:
2402 partials.append(\
2403 -np.dot(((pr[:,i+1]*(1-pr[:,j+1]))[:,None]*X).T,X))
2404 else:
2405 partials.append(-np.dot(((pr[:,i+1]*-pr[:,j+1])[:,None]*X).T,X))
2406 H = np.array(partials)
2407 # the developer's notes on multinomial should clear this math up
2408 H = np.transpose(H.reshape(J-1, J-1, K, K), (0, 2, 1, 3)).reshape((J-1)*K, (J-1)*K)
2409 return H
2412#TODO: Weibull can replaced by a survival analsysis function
2413# like stat's streg (The cox model as well)
2414#class Weibull(DiscreteModel):
2415# """
2416# Binary choice Weibull model
2417#
2418# Notes
2419# ------
2420# This is unfinished and untested.
2421# """
2422##TODO: add analytic hessian for Weibull
2423# def initialize(self):
2424# pass
2425#
2426# def cdf(self, X):
2427# """
2428# Gumbell (Log Weibull) cumulative distribution function
2429# """
2430## return np.exp(-np.exp(-X))
2431# return stats.gumbel_r.cdf(X)
2432# # these two are equivalent.
2433# # Greene table and discussion is incorrect.
2434#
2435# def pdf(self, X):
2436# """
2437# Gumbell (LogWeibull) probability distribution function
2438# """
2439# return stats.gumbel_r.pdf(X)
2440#
2441# def loglike(self, params):
2442# """
2443# Loglikelihood of Weibull distribution
2444# """
2445# X = self.exog
2446# cdf = self.cdf(np.dot(X,params))
2447# y = self.endog
2448# return np.sum(y*np.log(cdf) + (1-y)*np.log(1-cdf))
2449#
2450# def score(self, params):
2451# y = self.endog
2452# X = self.exog
2453# F = self.cdf(np.dot(X,params))
2454# f = self.pdf(np.dot(X,params))
2455# term = (y*f/F + (1 - y)*-f/(1-F))
2456# return np.dot(term,X)
2457#
2458# def hessian(self, params):
2459# hess = nd.Jacobian(self.score)
2460# return hess(params)
2461#
2462# def fit(self, start_params=None, method='newton', maxiter=35, tol=1e-08):
2463## The example had problems with all zero start values, Hessian = 0
2464# if start_params is None:
2465# start_params = OLS(self.endog, self.exog).fit().params
2466# mlefit = super(Weibull, self).fit(start_params=start_params,
2467# method=method, maxiter=maxiter, tol=tol)
2468# return mlefit
2469#
2472class NegativeBinomial(CountModel):
2473 __doc__ = """
2474 Negative Binomial Model
2476 %(params)s
2477 %(extra_params)s
2479 Attributes
2480 ----------
2481 endog : ndarray
2482 A reference to the endogenous response variable
2483 exog : ndarray
2484 A reference to the exogenous design.
2486 References
2487 ----------
2488 Greene, W. 2008. "Functional forms for the negative binomial model
2489 for count data". Economics Letters. Volume 99, Number 3, pp.585-590.
2490 Hilbe, J.M. 2011. "Negative binomial regression". Cambridge University
2491 Press.
2492 """ % {'params': base._model_params_doc,
2493 'extra_params':
2494 """loglike_method : str
2495 Log-likelihood type. 'nb2','nb1', or 'geometric'.
2496 Fitted value :math:`\\mu`
2497 Heterogeneity parameter :math:`\\alpha`
2499 - nb2: Variance equal to :math:`\\mu + \\alpha\\mu^2` (most common)
2500 - nb1: Variance equal to :math:`\\mu + \\alpha\\mu`
2501 - geometric: Variance equal to :math:`\\mu + \\mu^2`
2502 offset : array_like
2503 Offset is added to the linear prediction with coefficient equal to 1.
2504 exposure : array_like
2505 Log(exposure) is added to the linear prediction with coefficient
2506 equal to 1.
2507 """ + base._missing_param_doc}
2508 def __init__(self, endog, exog, loglike_method='nb2', offset=None,
2509 exposure=None, missing='none', **kwargs):
2510 super(NegativeBinomial, self).__init__(endog, exog, offset=offset,
2511 exposure=exposure,
2512 missing=missing, **kwargs)
2513 self.loglike_method = loglike_method
2514 self._initialize()
2515 if loglike_method in ['nb2', 'nb1']:
2516 self.exog_names.append('alpha')
2517 self.k_extra = 1
2518 else:
2519 self.k_extra = 0
2520 # store keys for extras if we need to recreate model instance
2521 # we need to append keys that do not go to super
2522 self._init_keys.append('loglike_method')
2524 def _initialize(self):
2525 if self.loglike_method == 'nb2':
2526 self.hessian = self._hessian_nb2
2527 self.score = self._score_nbin
2528 self.loglikeobs = self._ll_nb2
2529 self._transparams = True # transform lnalpha -> alpha in fit
2530 elif self.loglike_method == 'nb1':
2531 self.hessian = self._hessian_nb1
2532 self.score = self._score_nb1
2533 self.loglikeobs = self._ll_nb1
2534 self._transparams = True # transform lnalpha -> alpha in fit
2535 elif self.loglike_method == 'geometric':
2536 self.hessian = self._hessian_geom
2537 self.score = self._score_geom
2538 self.loglikeobs = self._ll_geometric
2539 else:
2540 raise ValueError('Likelihood type must "nb1", "nb2" '
2541 'or "geometric"')
2543 # Workaround to pickle instance methods
2544 def __getstate__(self):
2545 odict = self.__dict__.copy() # copy the dict since we change it
2546 del odict['hessian']
2547 del odict['score']
2548 del odict['loglikeobs']
2549 return odict
2551 def __setstate__(self, indict):
2552 self.__dict__.update(indict)
2553 self._initialize()
2555 def _ll_nbin(self, params, alpha, Q=0):
2556 if np.any(np.iscomplex(params)) or np.iscomplex(alpha):
2557 gamma_ln = loggamma
2558 else:
2559 gamma_ln = gammaln
2560 endog = self.endog
2561 mu = self.predict(params)
2562 size = 1/alpha * mu**Q
2563 prob = size/(size+mu)
2564 coeff = (gamma_ln(size+endog) - gamma_ln(endog+1) -
2565 gamma_ln(size))
2566 llf = coeff + size*np.log(prob) + endog*np.log(1-prob)
2567 return llf
2569 def _ll_nb2(self, params):
2570 if self._transparams: # got lnalpha during fit
2571 alpha = np.exp(params[-1])
2572 else:
2573 alpha = params[-1]
2574 return self._ll_nbin(params[:-1], alpha, Q=0)
2576 def _ll_nb1(self, params):
2577 if self._transparams: # got lnalpha during fit
2578 alpha = np.exp(params[-1])
2579 else:
2580 alpha = params[-1]
2581 return self._ll_nbin(params[:-1], alpha, Q=1)
2583 def _ll_geometric(self, params):
2584 # we give alpha of 1 because it's actually log(alpha) where alpha=0
2585 return self._ll_nbin(params, 1, 0)
2587 def loglike(self, params):
2588 r"""
2589 Loglikelihood for negative binomial model
2591 Parameters
2592 ----------
2593 params : array_like
2594 The parameters of the model. If `loglike_method` is nb1 or
2595 nb2, then the ancillary parameter is expected to be the
2596 last element.
2598 Returns
2599 -------
2600 llf : float
2601 The loglikelihood value at `params`
2603 Notes
2604 -----
2605 Following notation in Greene (2008), with negative binomial
2606 heterogeneity parameter :math:`\alpha`:
2608 .. math::
2610 \lambda_i &= exp(X\beta) \\
2611 \theta &= 1 / \alpha \\
2612 g_i &= \theta \lambda_i^Q \\
2613 w_i &= g_i/(g_i + \lambda_i) \\
2614 r_i &= \theta / (\theta+\lambda_i) \\
2615 ln \mathcal{L}_i &= ln \Gamma(y_i+g_i) - ln \Gamma(1+y_i) + g_iln (r_i) + y_i ln(1-r_i)
2617 where :math`Q=0` for NB2 and geometric and :math:`Q=1` for NB1.
2618 For the geometric, :math:`\alpha=0` as well.
2619 """
2620 llf = np.sum(self.loglikeobs(params))
2621 return llf
2623 def _score_geom(self, params):
2624 exog = self.exog
2625 y = self.endog[:, None]
2626 mu = self.predict(params)[:, None]
2627 dparams = exog * (y-mu)/(mu+1)
2628 return dparams.sum(0)
2630 def _score_nbin(self, params, Q=0):
2631 """
2632 Score vector for NB2 model
2633 """
2634 if self._transparams: # lnalpha came in during fit
2635 alpha = np.exp(params[-1])
2636 else:
2637 alpha = params[-1]
2638 params = params[:-1]
2639 exog = self.exog
2640 y = self.endog[:,None]
2641 mu = self.predict(params)[:,None]
2642 a1 = 1/alpha * mu**Q
2643 prob = a1 / (a1 + mu) # a1 aka "size" in _ll_nbin
2644 if Q == 1: # nb1
2645 # Q == 1 --> a1 = mu / alpha --> prob = 1 / (alpha + 1)
2646 dgpart = digamma(y + a1) - digamma(a1)
2647 dparams = exog * a1 * (np.log(prob) +
2648 dgpart)
2649 dalpha = ((alpha * (y - mu * np.log(prob) -
2650 mu*(dgpart + 1)) -
2651 mu * (np.log(prob) +
2652 dgpart))/
2653 (alpha**2*(alpha + 1))).sum()
2655 elif Q == 0: # nb2
2656 dgpart = digamma(y + a1) - digamma(a1)
2657 dparams = exog*a1 * (y-mu)/(mu+a1)
2658 da1 = -alpha**-2
2659 dalpha = (dgpart + np.log(a1)
2660 - np.log(a1+mu) - (y-mu)/(a1+mu)).sum() * da1
2662 #multiply above by constant outside sum to reduce rounding error
2663 if self._transparams:
2664 return np.r_[dparams.sum(0), dalpha*alpha]
2665 else:
2666 return np.r_[dparams.sum(0), dalpha]
2668 def _score_nb1(self, params):
2669 return self._score_nbin(params, Q=1)
2671 def _hessian_geom(self, params):
2672 exog = self.exog
2673 y = self.endog[:,None]
2674 mu = self.predict(params)[:,None]
2676 # for dl/dparams dparams
2677 dim = exog.shape[1]
2678 hess_arr = np.empty((dim, dim))
2679 const_arr = mu*(1+y)/(mu+1)**2
2680 for i in range(dim):
2681 for j in range(dim):
2682 if j > i:
2683 continue
2684 hess_arr[i,j] = np.sum(-exog[:,i,None] * exog[:,j,None] *
2685 const_arr, axis=0)
2686 tri_idx = np.triu_indices(dim, k=1)
2687 hess_arr[tri_idx] = hess_arr.T[tri_idx]
2688 return hess_arr
2691 def _hessian_nb1(self, params):
2692 """
2693 Hessian of NB1 model.
2694 """
2695 if self._transparams: # lnalpha came in during fit
2696 alpha = np.exp(params[-1])
2697 else:
2698 alpha = params[-1]
2700 params = params[:-1]
2701 exog = self.exog
2702 y = self.endog[:,None]
2703 mu = self.predict(params)[:,None]
2705 a1 = mu/alpha
2706 dgpart = digamma(y + a1) - digamma(a1)
2707 prob = 1 / (1 + alpha) # equiv: a1 / (a1 + mu)
2709 # for dl/dparams dparams
2710 dim = exog.shape[1]
2711 hess_arr = np.empty((dim+1,dim+1))
2712 #const_arr = a1*mu*(a1+y)/(mu+a1)**2
2713 # not all of dparams
2714 dparams = exog / alpha * (np.log(prob) +
2715 dgpart)
2717 dmudb = exog*mu
2718 xmu_alpha = exog * a1
2719 trigamma = (special.polygamma(1, a1 + y) -
2720 special.polygamma(1, a1))
2721 for i in range(dim):
2722 for j in range(dim):
2723 if j > i:
2724 continue
2725 hess_arr[i,j] = np.sum(dparams[:,i,None] * dmudb[:,j,None] +
2726 xmu_alpha[:,i,None] * xmu_alpha[:,j,None] *
2727 trigamma, axis=0)
2728 tri_idx = np.triu_indices(dim, k=1)
2729 hess_arr[tri_idx] = hess_arr.T[tri_idx]
2731 # for dl/dparams dalpha
2732 da1 = -alpha**-2
2733 dldpda = np.sum(-a1 * dparams + exog * a1 *
2734 (-trigamma*mu/alpha**2 - prob), axis=0)
2736 hess_arr[-1,:-1] = dldpda
2737 hess_arr[:-1,-1] = dldpda
2739 log_alpha = np.log(prob)
2740 alpha3 = alpha**3
2741 alpha2 = alpha**2
2742 mu2 = mu**2
2743 dada = ((alpha3*mu*(2*log_alpha + 2*dgpart + 3) -
2744 2*alpha3*y +
2745 4*alpha2*mu*(log_alpha + dgpart) +
2746 alpha2 * (2*mu - y) +
2747 2*alpha*mu2*trigamma + mu2 * trigamma + alpha2 * mu2 * trigamma +
2748 2*alpha*mu*(log_alpha + dgpart)
2749 )/(alpha**4*(alpha2 + 2*alpha + 1)))
2750 hess_arr[-1,-1] = dada.sum()
2752 return hess_arr
2754 def _hessian_nb2(self, params):
2755 """
2756 Hessian of NB2 model.
2757 """
2758 if self._transparams: # lnalpha came in during fit
2759 alpha = np.exp(params[-1])
2760 else:
2761 alpha = params[-1]
2762 a1 = 1/alpha
2763 params = params[:-1]
2765 exog = self.exog
2766 y = self.endog[:,None]
2767 mu = self.predict(params)[:,None]
2768 prob = a1 / (a1 + mu)
2769 dgpart = digamma(a1 + y) - digamma(a1)
2771 # for dl/dparams dparams
2772 dim = exog.shape[1]
2773 hess_arr = np.empty((dim+1,dim+1))
2774 const_arr = a1*mu*(a1+y)/(mu+a1)**2
2775 for i in range(dim):
2776 for j in range(dim):
2777 if j > i:
2778 continue
2779 hess_arr[i,j] = np.sum(-exog[:,i,None] * exog[:,j,None] *
2780 const_arr, axis=0)
2781 tri_idx = np.triu_indices(dim, k=1)
2782 hess_arr[tri_idx] = hess_arr.T[tri_idx]
2784 # for dl/dparams dalpha
2785 da1 = -alpha**-2
2786 dldpda = -np.sum(mu*exog*(y-mu)*a1**2/(mu+a1)**2 , axis=0)
2787 hess_arr[-1,:-1] = dldpda
2788 hess_arr[:-1,-1] = dldpda
2790 # for dl/dalpha dalpha
2791 #NOTE: polygamma(1,x) is the trigamma function
2792 da2 = 2*alpha**-3
2793 dalpha = da1 * (dgpart +
2794 np.log(prob) - (y - mu)/(a1+mu))
2795 dada = (da2 * dalpha/da1 + da1**2 * (special.polygamma(1, a1+y) -
2796 special.polygamma(1, a1) + 1/a1 - 1/(a1 + mu) +
2797 (y - mu)/(mu + a1)**2)).sum()
2798 hess_arr[-1,-1] = dada
2800 return hess_arr
2802 #TODO: replace this with analytic where is it used?
2803 def score_obs(self, params):
2804 sc = approx_fprime_cs(params, self.loglikeobs)
2805 return sc
2807 @Appender(_get_start_params_null_docs)
2808 def _get_start_params_null(self):
2809 offset = getattr(self, "offset", 0)
2810 exposure = getattr(self, "exposure", 0)
2811 const = (self.endog / np.exp(offset + exposure)).mean()
2812 params = [np.log(const)]
2813 mu = const * np.exp(offset + exposure)
2814 resid = self.endog - mu
2815 a = self._estimate_dispersion(mu, resid, df_resid=resid.shape[0] - 1)
2816 params.append(a)
2817 return np.array(params)
2819 def _estimate_dispersion(self, mu, resid, df_resid=None):
2820 if df_resid is None:
2821 df_resid = resid.shape[0]
2822 if self.loglike_method == 'nb2':
2823 #params.append(np.linalg.pinv(mu[:,None]).dot(resid**2 / mu - 1))
2824 a = ((resid**2 / mu - 1) / mu).sum() / df_resid
2825 else: #self.loglike_method == 'nb1':
2826 a = (resid**2 / mu - 1).sum() / df_resid
2827 return a
2829 def fit(self, start_params=None, method='bfgs', maxiter=35,
2830 full_output=1, disp=1, callback=None,
2831 cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs):
2833 # Note: do not let super handle robust covariance because it has
2834 # transformed params
2835 self._transparams = False # always define attribute
2836 if self.loglike_method.startswith('nb') and method not in ['newton',
2837 'ncg']:
2838 self._transparams = True # in case same Model instance is refit
2839 elif self.loglike_method.startswith('nb'): # method is newton/ncg
2840 self._transparams = False # because we need to step in alpha space
2842 if start_params is None:
2843 # Use poisson fit as first guess.
2844 #TODO, Warning: this assumes exposure is logged
2845 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
2846 if np.size(offset) == 1 and offset == 0:
2847 offset = None
2848 optim_kwds_prelim = {'disp': 0, 'skip_hessian': True,
2849 'warn_convergence': False}
2850 optim_kwds_prelim.update(kwargs.get('optim_kwds_prelim', {}))
2851 mod_poi = Poisson(self.endog, self.exog, offset=offset)
2852 res_poi = mod_poi.fit(**optim_kwds_prelim)
2853 start_params = res_poi.params
2854 if self.loglike_method.startswith('nb'):
2855 a = self._estimate_dispersion(res_poi.predict(), res_poi.resid,
2856 df_resid=res_poi.df_resid)
2857 start_params = np.append(start_params, max(0.05, a))
2858 else:
2859 if self._transparams is True:
2860 # transform user provided start_params dispersion, see #3918
2861 start_params = np.array(start_params, copy=True)
2862 start_params[-1] = np.log(start_params[-1])
2864 if callback is None:
2865 # work around perfect separation callback #3895
2866 callback = lambda *x: x
2868 mlefit = super(NegativeBinomial, self).fit(start_params=start_params,
2869 maxiter=maxiter, method=method, disp=disp,
2870 full_output=full_output, callback=callback,
2871 **kwargs)
2872 # TODO: Fix NBin _check_perfect_pred
2873 if self.loglike_method.startswith('nb'):
2874 # mlefit is a wrapped counts results
2875 self._transparams = False # do not need to transform anymore now
2876 # change from lnalpha to alpha
2877 if method not in ["newton", "ncg"]:
2878 mlefit._results.params[-1] = np.exp(mlefit._results.params[-1])
2880 nbinfit = NegativeBinomialResults(self, mlefit._results)
2881 result = NegativeBinomialResultsWrapper(nbinfit)
2882 else:
2883 result = mlefit
2885 if cov_kwds is None:
2886 cov_kwds = {} #TODO: make this unnecessary ?
2887 result._get_robustcov_results(cov_type=cov_type,
2888 use_self=True, use_t=use_t, **cov_kwds)
2889 return result
2892 def fit_regularized(self, start_params=None, method='l1',
2893 maxiter='defined_by_method', full_output=1, disp=1, callback=None,
2894 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
2895 qc_tol=0.03, **kwargs):
2897 _validate_l1_method(method)
2899 if self.loglike_method.startswith('nb') and (np.size(alpha) == 1 and
2900 alpha != 0):
2901 # do not penalize alpha if alpha is scalar
2902 k_params = self.exog.shape[1] + self.k_extra
2903 alpha = alpha * np.ones(k_params)
2904 alpha[-1] = 0
2906 # alpha for regularized poisson to get starting values
2907 alpha_p = alpha[:-1] if (self.k_extra and np.size(alpha) > 1) else alpha
2909 self._transparams = False
2910 if start_params is None:
2911 # Use poisson fit as first guess.
2912 #TODO, Warning: this assumes exposure is logged
2913 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
2914 if np.size(offset) == 1 and offset == 0:
2915 offset = None
2916 mod_poi = Poisson(self.endog, self.exog, offset=offset)
2917 start_params = mod_poi.fit_regularized(
2918 start_params=start_params, method=method, maxiter=maxiter,
2919 full_output=full_output, disp=0, callback=callback,
2920 alpha=alpha_p, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
2921 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params
2922 if self.loglike_method.startswith('nb'):
2923 start_params = np.append(start_params, 0.1)
2925 cntfit = super(CountModel, self).fit_regularized(
2926 start_params=start_params, method=method, maxiter=maxiter,
2927 full_output=full_output, disp=disp, callback=callback,
2928 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
2929 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
2931 discretefit = L1NegativeBinomialResults(self, cntfit)
2932 return L1NegativeBinomialResultsWrapper(discretefit)
2935class NegativeBinomialP(CountModel):
2936 __doc__ = """
2937 Generalized Negative Binomial (NB-P) Model
2939 %(params)s
2940 %(extra_params)s
2942 Attributes
2943 ----------
2944 endog : ndarray
2945 A reference to the endogenous response variable
2946 exog : ndarray
2947 A reference to the exogenous design.
2948 p : scalar
2949 P denotes parameterizations for NB-P regression. p=1 for NB-1 and
2950 p=2 for NB-2. Default is p=1.
2951 """ % {'params' : base._model_params_doc,
2952 'extra_params' :
2953 """p : scalar
2954 P denotes parameterizations for NB regression. p=1 for NB-1 and
2955 p=2 for NB-2. Default is p=2.
2956 offset : array_like
2957 Offset is added to the linear prediction with coefficient equal to 1.
2958 exposure : array_like
2959 Log(exposure) is added to the linear prediction with coefficient
2960 equal to 1.
2961 """ + base._missing_param_doc}
2963 def __init__(self, endog, exog, p=2, offset=None,
2964 exposure=None, missing='none', **kwargs):
2965 super(NegativeBinomialP, self).__init__(endog, exog, offset=offset,
2966 exposure=exposure,
2967 missing=missing, **kwargs)
2968 self.parameterization = p
2969 self.exog_names.append('alpha')
2970 self.k_extra = 1
2971 self._transparams = False
2973 def _get_init_kwds(self):
2974 kwds = super(NegativeBinomialP, self)._get_init_kwds()
2975 kwds['p'] = self.parameterization
2976 return kwds
2978 def loglike(self, params):
2979 """
2980 Loglikelihood of Generalized Negative Binomial (NB-P) model
2982 Parameters
2983 ----------
2984 params : array_like
2985 The parameters of the model.
2987 Returns
2988 -------
2989 loglike : float
2990 The log-likelihood function of the model evaluated at `params`.
2991 See notes.
2992 """
2993 return np.sum(self.loglikeobs(params))
2995 def loglikeobs(self, params):
2996 """
2997 Loglikelihood for observations of Generalized Negative Binomial (NB-P) model
2999 Parameters
3000 ----------
3001 params : array_like
3002 The parameters of the model.
3004 Returns
3005 -------
3006 loglike : ndarray
3007 The log likelihood for each observation of the model evaluated
3008 at `params`. See Notes
3009 """
3010 if self._transparams:
3011 alpha = np.exp(params[-1])
3012 else:
3013 alpha = params[-1]
3015 params = params[:-1]
3016 p = self.parameterization
3017 y = self.endog
3019 mu = self.predict(params)
3020 mu_p = mu**(2 - p)
3021 a1 = mu_p / alpha
3022 a2 = mu + a1
3024 llf = (gammaln(y + a1) - gammaln(y + 1) - gammaln(a1) +
3025 a1 * np.log(a1) + y * np.log(mu) -
3026 (y + a1) * np.log(a2))
3028 return llf
3030 def score_obs(self, params):
3031 """
3032 Generalized Negative Binomial (NB-P) model score (gradient) vector of the log-likelihood for each observations.
3034 Parameters
3035 ----------
3036 params : array_like
3037 The parameters of the model
3039 Returns
3040 -------
3041 score : ndarray, 1-D
3042 The score vector of the model, i.e. the first derivative of the
3043 loglikelihood function, evaluated at `params`
3044 """
3045 if self._transparams:
3046 alpha = np.exp(params[-1])
3047 else:
3048 alpha = params[-1]
3050 params = params[:-1]
3051 p = 2 - self.parameterization
3052 y = self.endog
3054 mu = self.predict(params)
3055 mu_p = mu**p
3056 a1 = mu_p / alpha
3057 a2 = mu + a1
3058 a3 = y + a1
3059 a4 = p * a1 / mu
3061 dgpart = digamma(a3) - digamma(a1)
3062 dgterm = dgpart + np.log(a1 / a2) + 1 - a3 / a2
3063 # TODO: better name/interpretation for dgterm?
3065 dparams = (a4 * dgterm -
3066 a3 / a2 +
3067 y / mu)
3068 dparams = (self.exog.T * mu * dparams).T
3069 dalpha = -a1 / alpha * dgterm
3071 return np.concatenate((dparams, np.atleast_2d(dalpha).T),
3072 axis=1)
3074 def score(self, params):
3075 """
3076 Generalized Negative Binomial (NB-P) model score (gradient) vector of the log-likelihood
3078 Parameters
3079 ----------
3080 params : array_like
3081 The parameters of the model
3083 Returns
3084 -------
3085 score : ndarray, 1-D
3086 The score vector of the model, i.e. the first derivative of the
3087 loglikelihood function, evaluated at `params`
3088 """
3089 score = np.sum(self.score_obs(params), axis=0)
3090 if self._transparams:
3091 score[-1] == score[-1] ** 2
3092 return score
3093 else:
3094 return score
3096 def hessian(self, params):
3097 """
3098 Generalized Negative Binomial (NB-P) model hessian maxtrix of the log-likelihood
3100 Parameters
3101 ----------
3102 params : array_like
3103 The parameters of the model
3105 Returns
3106 -------
3107 hessian : ndarray, 2-D
3108 The hessian matrix of the model.
3109 """
3110 if self._transparams:
3111 alpha = np.exp(params[-1])
3112 else:
3113 alpha = params[-1]
3114 params = params[:-1]
3116 p = 2 - self.parameterization
3117 y = self.endog
3118 exog = self.exog
3119 mu = self.predict(params)
3121 mu_p = mu**p
3122 a1 = mu_p / alpha
3123 a2 = mu + a1
3124 a3 = y + a1
3125 a4 = p * a1 / mu
3127 prob = a1 / a2
3128 lprob = np.log(prob)
3129 dgpart = digamma(a3) - digamma(a1)
3130 pgpart = polygamma(1, a3) - polygamma(1, a1)
3132 dim = exog.shape[1]
3133 hess_arr = np.zeros((dim + 1, dim + 1))
3135 coeff = mu**2 * (((1 + a4)**2 * a3 / a2**2 -
3136 a3 / a2 * (p - 1) * a4 / mu -
3137 y / mu**2 -
3138 2 * a4 * (1 + a4) / a2 +
3139 p * a4 / mu * (lprob + dgpart + 2) -
3140 a4 / mu * (lprob + dgpart + 1) +
3141 a4**2 * pgpart) +
3142 (-(1 + a4) * a3 / a2 +
3143 y / mu +
3144 a4 * (lprob + dgpart + 1)) / mu)
3146 for i in range(dim):
3147 hess_arr[i, :-1] = np.sum(self.exog[:, :].T * self.exog[:, i] * coeff, axis=1)
3150 hess_arr[-1,:-1] = (self.exog[:, :].T * mu * a1 *
3151 ((1 + a4) * (1 - a3 / a2) / a2 -
3152 p * (lprob + dgpart + 2) / mu +
3153 p / mu * (a3 + p * a1) / a2 -
3154 a4 * pgpart) / alpha).sum(axis=1)
3157 da2 = (a1 * (2 * lprob +
3158 2 * dgpart + 3 -
3159 2 * a3 / a2
3160 + a1 * pgpart
3161 - 2 * prob +
3162 prob * a3 / a2) / alpha**2)
3164 hess_arr[-1, -1] = da2.sum()
3166 tri_idx = np.triu_indices(dim + 1, k=1)
3167 hess_arr[tri_idx] = hess_arr.T[tri_idx]
3169 return hess_arr
3171 @Appender(_get_start_params_null_docs)
3172 def _get_start_params_null(self):
3173 offset = getattr(self, "offset", 0)
3174 exposure = getattr(self, "exposure", 0)
3175 q = self.parameterization - 1
3177 const = (self.endog / np.exp(offset + exposure)).mean()
3178 params = [np.log(const)]
3179 mu = const * np.exp(offset + exposure)
3180 resid = self.endog - mu
3181 a = self._estimate_dispersion(mu, resid, df_resid=resid.shape[0] - 1)
3182 params.append(a)
3184 return np.array(params)
3186 def _estimate_dispersion(self, mu, resid, df_resid=None):
3187 q = self.parameterization - 1
3188 if df_resid is None:
3189 df_resid = resid.shape[0]
3190 a = ((resid**2 / mu - 1) * mu**(-q)).sum() / df_resid
3191 return a
3193 @Appender(DiscreteModel.fit.__doc__)
3194 def fit(self, start_params=None, method='bfgs', maxiter=35,
3195 full_output=1, disp=1, callback=None, use_transparams=False,
3196 cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs):
3197 # TODO: Fix doc string
3198 """
3199 use_transparams : bool
3200 This parameter enable internal transformation to impose
3201 non-negativity. True to enable. Default is False.
3202 use_transparams=True imposes the no underdispersion (alpha > 0)
3203 constraint. In case use_transparams=True and method="newton" or
3204 "ncg" transformation is ignored.
3205 """
3206 if use_transparams and method not in ['newton', 'ncg']:
3207 self._transparams = True
3208 else:
3209 if use_transparams:
3210 warnings.warn('Parameter "use_transparams" is ignored',
3211 RuntimeWarning)
3212 self._transparams = False
3214 if start_params is None:
3215 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
3216 if np.size(offset) == 1 and offset == 0:
3217 offset = None
3219 optim_kwds_prelim = {'disp': 0, 'skip_hessian': True,
3220 'warn_convergence': False}
3221 optim_kwds_prelim.update(kwargs.get('optim_kwds_prelim', {}))
3222 mod_poi = Poisson(self.endog, self.exog, offset=offset)
3223 res_poi = mod_poi.fit(**optim_kwds_prelim)
3224 start_params = res_poi.params
3225 a = self._estimate_dispersion(res_poi.predict(), res_poi.resid,
3226 df_resid=res_poi.df_resid)
3227 start_params = np.append(start_params, max(0.05, a))
3229 if callback is None:
3230 # work around perfect separation callback #3895
3231 callback = lambda *x: x
3233 mlefit = super(NegativeBinomialP, self).fit(start_params=start_params,
3234 maxiter=maxiter, method=method, disp=disp,
3235 full_output=full_output, callback=callback,
3236 **kwargs)
3238 if use_transparams and method not in ["newton", "ncg"]:
3239 self._transparams = False
3240 mlefit._results.params[-1] = np.exp(mlefit._results.params[-1])
3242 nbinfit = NegativeBinomialResults(self, mlefit._results)
3243 result = NegativeBinomialResultsWrapper(nbinfit)
3245 if cov_kwds is None:
3246 cov_kwds = {}
3247 result._get_robustcov_results(cov_type=cov_type,
3248 use_self=True, use_t=use_t, **cov_kwds)
3249 return result
3251 @Appender(DiscreteModel.fit_regularized.__doc__)
3252 def fit_regularized(self, start_params=None, method='l1',
3253 maxiter='defined_by_method', full_output=1, disp=1, callback=None,
3254 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
3255 qc_tol=0.03, **kwargs):
3257 _validate_l1_method(method)
3259 if np.size(alpha) == 1 and alpha != 0:
3260 k_params = self.exog.shape[1] + self.k_extra
3261 alpha = alpha * np.ones(k_params)
3262 alpha[-1] = 0
3264 alpha_p = alpha[:-1] if (self.k_extra and np.size(alpha) > 1) else alpha
3266 self._transparams = False
3267 if start_params is None:
3268 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
3269 if np.size(offset) == 1 and offset == 0:
3270 offset = None
3271 mod_poi = Poisson(self.endog, self.exog, offset=offset)
3272 start_params = mod_poi.fit_regularized(
3273 start_params=start_params, method=method, maxiter=maxiter,
3274 full_output=full_output, disp=0, callback=callback,
3275 alpha=alpha_p, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
3276 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params
3277 start_params = np.append(start_params, 0.1)
3279 cntfit = super(CountModel, self).fit_regularized(
3280 start_params=start_params, method=method, maxiter=maxiter,
3281 full_output=full_output, disp=disp, callback=callback,
3282 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
3283 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
3285 discretefit = L1NegativeBinomialResults(self, cntfit)
3287 return L1NegativeBinomialResultsWrapper(discretefit)
3289 def predict(self, params, exog=None, exposure=None, offset=None,
3290 which='mean'):
3291 """
3292 Predict response variable of a model given exogenous variables.
3294 Parameters
3295 ----------
3296 params : array_like
3297 2d array of fitted parameters of the model. Should be in the
3298 order returned from the model.
3299 exog : array_like, optional
3300 1d or 2d array of exogenous values. If not supplied, the
3301 whole exog attribute of the model is used. If a 1d array is given
3302 it assumed to be 1 row of exogenous variables. If you only have
3303 one regressor and would like to do prediction, you must provide
3304 a 2d array with shape[1] == 1.
3305 linear : bool, optional
3306 If True, returns the linear predictor dot(exog,params). Else,
3307 returns the value of the cdf at the linear predictor.
3308 offset : array_like, optional
3309 Offset is added to the linear prediction with coefficient equal to 1.
3310 exposure : array_like, optional
3311 Log(exposure) is added to the linear prediction with coefficient
3312 equal to 1.
3313 which : 'mean', 'linear', 'prob', optional.
3314 'mean' returns the exp of linear predictor exp(dot(exog,params)).
3315 'linear' returns the linear predictor dot(exog,params).
3316 'prob' return probabilities for counts from 0 to max(endog).
3317 Default is 'mean'.
3319 Notes
3320 -----
3321 """
3322 if exog is None:
3323 exog = self.exog
3325 if exposure is None:
3326 exposure = getattr(self, 'exposure', 0)
3327 elif exposure != 0:
3328 exposure = np.log(exposure)
3330 if offset is None:
3331 offset = getattr(self, 'offset', 0)
3333 fitted = np.dot(exog, params[:exog.shape[1]])
3334 linpred = fitted + exposure + offset
3336 if which == 'mean':
3337 return np.exp(linpred)
3338 elif which == 'linear':
3339 return linpred
3340 elif which =='prob':
3341 counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
3342 mu = self.predict(params, exog, exposure, offset)
3343 size, prob = self.convert_params(params, mu)
3344 return nbinom.pmf(counts, size[:,None], prob[:,None])
3345 else:
3346 raise ValueError('keyword "which" = %s not recognized' % which)
3348 def convert_params(self, params, mu):
3349 alpha = params[-1]
3350 p = 2 - self.parameterization
3352 size = 1. / alpha * mu**p
3353 prob = size / (size + mu)
3355 return (size, prob)
3358### Results Class ###
3360class DiscreteResults(base.LikelihoodModelResults):
3361 __doc__ = _discrete_results_docs % {"one_line_description" :
3362 "A results class for the discrete dependent variable models.",
3363 "extra_attr" : ""}
3365 def __init__(self, model, mlefit, cov_type='nonrobust', cov_kwds=None,
3366 use_t=None):
3367 #super(DiscreteResults, self).__init__(model, params,
3368 # np.linalg.inv(-hessian), scale=1.)
3369 self.model = model
3370 self.df_model = model.df_model
3371 self.df_resid = model.df_resid
3372 self._cache = {}
3373 self.nobs = model.exog.shape[0]
3374 self.__dict__.update(mlefit.__dict__)
3376 if not hasattr(self, 'cov_type'):
3377 # do this only if super, i.e. mlefit did not already add cov_type
3378 # robust covariance
3379 if use_t is not None:
3380 self.use_t = use_t
3381 if cov_type == 'nonrobust':
3382 self.cov_type = 'nonrobust'
3383 self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
3384 'covariance matrix of the errors is correctly ' +
3385 'specified.'}
3386 else:
3387 if cov_kwds is None:
3388 cov_kwds = {}
3389 from statsmodels.base.covtype import get_robustcov_results
3390 get_robustcov_results(self, cov_type=cov_type, use_self=True,
3391 **cov_kwds)
3394 def __getstate__(self):
3395 # remove unpicklable methods
3396 mle_settings = getattr(self, 'mle_settings', None)
3397 if mle_settings is not None:
3398 if 'callback' in mle_settings:
3399 mle_settings['callback'] = None
3400 if 'cov_params_func' in mle_settings:
3401 mle_settings['cov_params_func'] = None
3402 return self.__dict__
3404 @cache_readonly
3405 def prsquared(self):
3406 """
3407 McFadden's pseudo-R-squared. `1 - (llf / llnull)`
3408 """
3409 return 1 - self.llf/self.llnull
3411 @cache_readonly
3412 def llr(self):
3413 """
3414 Likelihood ratio chi-squared statistic; `-2*(llnull - llf)`
3415 """
3416 return -2*(self.llnull - self.llf)
3418 @cache_readonly
3419 def llr_pvalue(self):
3420 """
3421 The chi-squared probability of getting a log-likelihood ratio
3422 statistic greater than llr. llr has a chi-squared distribution
3423 with degrees of freedom `df_model`.
3424 """
3425 return stats.distributions.chi2.sf(self.llr, self.df_model)
3427 def set_null_options(self, llnull=None, attach_results=True, **kwargs):
3428 """
3429 Set the fit options for the Null (constant-only) model.
3431 This resets the cache for related attributes which is potentially
3432 fragile. This only sets the option, the null model is estimated
3433 when llnull is accessed, if llnull is not yet in cache.
3435 Parameters
3436 ----------
3437 llnull : {None, float}
3438 If llnull is not None, then the value will be directly assigned to
3439 the cached attribute "llnull".
3440 attach_results : bool
3441 Sets an internal flag whether the results instance of the null
3442 model should be attached. By default without calling this method,
3443 thenull model results are not attached and only the loglikelihood
3444 value llnull is stored.
3445 **kwargs
3446 Additional keyword arguments used as fit keyword arguments for the
3447 null model. The override and model default values.
3449 Notes
3450 -----
3451 Modifies attributes of this instance, and so has no return.
3452 """
3453 # reset cache, note we need to add here anything that depends on
3454 # llnullor the null model. If something is missing, then the attribute
3455 # might be incorrect.
3456 self._cache.pop('llnull', None)
3457 self._cache.pop('llr', None)
3458 self._cache.pop('llr_pvalue', None)
3459 self._cache.pop('prsquared', None)
3460 if hasattr(self, 'res_null'):
3461 del self.res_null
3463 if llnull is not None:
3464 self._cache['llnull'] = llnull
3465 self._attach_nullmodel = attach_results
3466 self._optim_kwds_null = kwargs
3468 @cache_readonly
3469 def llnull(self):
3470 """
3471 Value of the constant-only loglikelihood
3472 """
3473 model = self.model
3474 kwds = model._get_init_kwds().copy()
3475 for key in getattr(model, '_null_drop_keys', []):
3476 del kwds[key]
3477 # TODO: what parameters to pass to fit?
3478 mod_null = model.__class__(model.endog, np.ones(self.nobs), **kwds)
3479 # TODO: consider catching and warning on convergence failure?
3480 # in the meantime, try hard to converge. see
3481 # TestPoissonConstrained1a.test_smoke
3483 optim_kwds = getattr(self, '_optim_kwds_null', {}).copy()
3485 if 'start_params' in optim_kwds:
3486 # user provided
3487 sp_null = optim_kwds.pop('start_params')
3488 elif hasattr(model, '_get_start_params_null'):
3489 # get moment estimates if available
3490 sp_null = model._get_start_params_null()
3491 else:
3492 sp_null = None
3494 opt_kwds = dict(method='bfgs', warn_convergence=False, maxiter=10000,
3495 disp=0)
3496 opt_kwds.update(optim_kwds)
3498 if optim_kwds:
3499 res_null = mod_null.fit(start_params=sp_null, **opt_kwds)
3500 else:
3501 # this should be a reasonably method case across versions
3502 res_null = mod_null.fit(start_params=sp_null, method='nm',
3503 warn_convergence=False,
3504 maxiter=10000, disp=0)
3505 res_null = mod_null.fit(start_params=res_null.params, method='bfgs',
3506 warn_convergence=False,
3507 maxiter=10000, disp=0)
3509 if getattr(self, '_attach_nullmodel', False) is not False:
3510 self.res_null = res_null
3512 return res_null.llf
3514 @cache_readonly
3515 def fittedvalues(self):
3516 """
3517 Linear predictor XB.
3518 """
3519 return np.dot(self.model.exog, self.params[:self.model.exog.shape[1]])
3521 @cache_readonly
3522 def resid_response(self):
3523 """
3524 Respnose residuals. The response residuals are defined as
3525 `endog - fittedvalues`
3526 """
3527 return self.model.endog - self.predict()
3529 @cache_readonly
3530 def aic(self):
3531 """
3532 Akaike information criterion. `-2*(llf - p)` where `p` is the number
3533 of regressors including the intercept.
3534 """
3535 return -2*(self.llf - (self.df_model+1))
3537 @cache_readonly
3538 def bic(self):
3539 """
3540 Bayesian information criterion. `-2*llf + ln(nobs)*p` where `p` is the
3541 number of regressors including the intercept.
3542 """
3543 return -2*self.llf + np.log(self.nobs)*(self.df_model+1)
3545 def _get_endog_name(self, yname, yname_list):
3546 if yname is None:
3547 yname = self.model.endog_names
3548 if yname_list is None:
3549 yname_list = self.model.endog_names
3550 return yname, yname_list
3552 def get_margeff(self, at='overall', method='dydx', atexog=None,
3553 dummy=False, count=False):
3554 """Get marginal effects of the fitted model.
3556 Parameters
3557 ----------
3558 at : str, optional
3559 Options are:
3561 - 'overall', The average of the marginal effects at each
3562 observation.
3563 - 'mean', The marginal effects at the mean of each regressor.
3564 - 'median', The marginal effects at the median of each regressor.
3565 - 'zero', The marginal effects at zero for each regressor.
3566 - 'all', The marginal effects at each observation. If `at` is all
3567 only margeff will be available from the returned object.
3569 Note that if `exog` is specified, then marginal effects for all
3570 variables not specified by `exog` are calculated using the `at`
3571 option.
3572 method : str, optional
3573 Options are:
3575 - 'dydx' - dy/dx - No transformation is made and marginal effects
3576 are returned. This is the default.
3577 - 'eyex' - estimate elasticities of variables in `exog` --
3578 d(lny)/d(lnx)
3579 - 'dyex' - estimate semi-elasticity -- dy/d(lnx)
3580 - 'eydx' - estimate semi-elasticity -- d(lny)/dx
3582 Note that tranformations are done after each observation is
3583 calculated. Semi-elasticities for binary variables are computed
3584 using the midpoint method. 'dyex' and 'eyex' do not make sense
3585 for discrete variables. For interpretations of these methods
3586 see notes below.
3587 atexog : array_like, optional
3588 Optionally, you can provide the exogenous variables over which to
3589 get the marginal effects. This should be a dictionary with the key
3590 as the zero-indexed column number and the value of the dictionary.
3591 Default is None for all independent variables less the constant.
3592 dummy : bool, optional
3593 If False, treats binary variables (if present) as continuous. This
3594 is the default. Else if True, treats binary variables as
3595 changing from 0 to 1. Note that any variable that is either 0 or 1
3596 is treated as binary. Each binary variable is treated separately
3597 for now.
3598 count : bool, optional
3599 If False, treats count variables (if present) as continuous. This
3600 is the default. Else if True, the marginal effect is the
3601 change in probabilities when each observation is increased by one.
3603 Returns
3604 -------
3605 DiscreteMargins : marginal effects instance
3606 Returns an object that holds the marginal effects, standard
3607 errors, confidence intervals, etc. See
3608 `statsmodels.discrete.discrete_margins.DiscreteMargins` for more
3609 information.
3611 Notes
3612 -----
3613 Interpretations of methods:
3615 - 'dydx' - change in `endog` for a change in `exog`.
3616 - 'eyex' - proportional change in `endog` for a proportional change
3617 in `exog`.
3618 - 'dyex' - change in `endog` for a proportional change in `exog`.
3619 - 'eydx' - proportional change in `endog` for a change in `exog`.
3621 When using after Poisson, returns the expected number of events per
3622 period, assuming that the model is loglinear.
3623 """
3624 from statsmodels.discrete.discrete_margins import DiscreteMargins
3625 return DiscreteMargins(self, (at, method, atexog, dummy, count))
3627 def summary(self, yname=None, xname=None, title=None, alpha=.05,
3628 yname_list=None):
3629 """
3630 Summarize the Regression Results.
3632 Parameters
3633 ----------
3634 yname : str, optional
3635 The name of the endog variable in the tables. The default is `y`.
3636 xname : list[str], optional
3637 The names for the exogenous variables, default is "var_xx".
3638 Must match the number of parameters in the model.
3639 title : str, optional
3640 Title for the top table. If not None, then this replaces the
3641 default title.
3642 alpha : float
3643 The significance level for the confidence intervals.
3645 Returns
3646 -------
3647 Summary
3648 Class that holds the summary tables and text, which can be printed
3649 or converted to various output formats.
3651 See Also
3652 --------
3653 statsmodels.iolib.summary.Summary : Class that hold summary results.
3654 """
3656 top_left = [('Dep. Variable:', None),
3657 ('Model:', [self.model.__class__.__name__]),
3658 ('Method:', ['MLE']),
3659 ('Date:', None),
3660 ('Time:', None),
3661 ('converged:', ["%s" % self.mle_retvals['converged']]),
3662 ]
3664 top_right = [('No. Observations:', None),
3665 ('Df Residuals:', None),
3666 ('Df Model:', None),
3667 ('Pseudo R-squ.:', ["%#6.4g" % self.prsquared]),
3668 ('Log-Likelihood:', None),
3669 ('LL-Null:', ["%#8.5g" % self.llnull]),
3670 ('LLR p-value:', ["%#6.4g" % self.llr_pvalue])
3671 ]
3673 if hasattr(self, 'cov_type'):
3674 top_left.append(('Covariance Type:', [self.cov_type]))
3676 if title is None:
3677 title = self.model.__class__.__name__ + ' ' + "Regression Results"
3679 # boiler plate
3680 from statsmodels.iolib.summary import Summary
3681 smry = Summary()
3682 yname, yname_list = self._get_endog_name(yname, yname_list)
3684 # for top of table
3685 smry.add_table_2cols(self, gleft=top_left, gright=top_right,
3686 yname=yname, xname=xname, title=title)
3688 # for parameters, etc
3689 smry.add_table_params(self, yname=yname_list, xname=xname, alpha=alpha,
3690 use_t=self.use_t)
3692 if hasattr(self, 'constraints'):
3693 smry.add_extra_txt(['Model has been estimated subject to linear '
3694 'equality constraints.'])
3696 return smry
3698 def summary2(self, yname=None, xname=None, title=None, alpha=.05,
3699 float_format="%.4f"):
3700 """
3701 Experimental function to summarize regression results.
3703 Parameters
3704 ----------
3705 yname : str
3706 Name of the dependent variable (optional).
3707 xname : list[str], optional
3708 List of strings of length equal to the number of parameters
3709 Names of the independent variables (optional).
3710 title : str, optional
3711 Title for the top table. If not None, then this replaces the
3712 default title.
3713 alpha : float
3714 The significance level for the confidence intervals.
3715 float_format : str
3716 The print format for floats in parameters summary.
3718 Returns
3719 -------
3720 Summary
3721 Instance that contains the summary tables and text, which can be
3722 printed or converted to various output formats.
3724 See Also
3725 --------
3726 statsmodels.iolib.summary2.Summary : Class that holds summary results.
3727 """
3728 from statsmodels.iolib import summary2
3729 smry = summary2.Summary()
3730 smry.add_base(results=self, alpha=alpha, float_format=float_format,
3731 xname=xname, yname=yname, title=title)
3733 if hasattr(self, 'constraints'):
3734 smry.add_text('Model has been estimated subject to linear '
3735 'equality constraints.')
3737 return smry
3740class CountResults(DiscreteResults):
3741 __doc__ = _discrete_results_docs % {
3742 "one_line_description": "A results class for count data",
3743 "extra_attr": ""}
3744 @cache_readonly
3745 def resid(self):
3746 """
3747 Residuals
3749 Notes
3750 -----
3751 The residuals for Count models are defined as
3753 .. math:: y - p
3755 where :math:`p = \\exp(X\\beta)`. Any exposure and offset variables
3756 are also handled.
3757 """
3758 return self.model.endog - self.predict()
3761class NegativeBinomialResults(CountResults):
3762 __doc__ = _discrete_results_docs % {
3763 "one_line_description": "A results class for NegativeBinomial 1 and 2",
3764 "extra_attr": ""}
3766 @cache_readonly
3767 def lnalpha(self):
3768 """Natural log of alpha"""
3769 return np.log(self.params[-1])
3771 @cache_readonly
3772 def lnalpha_std_err(self):
3773 """Natural log of standardized error"""
3774 return self.bse[-1] / self.params[-1]
3776 @cache_readonly
3777 def aic(self):
3778 # + 1 because we estimate alpha
3779 k_extra = getattr(self.model, 'k_extra', 0)
3780 return -2*(self.llf - (self.df_model + self.k_constant + k_extra))
3782 @cache_readonly
3783 def bic(self):
3784 # + 1 because we estimate alpha
3785 k_extra = getattr(self.model, 'k_extra', 0)
3786 return -2*self.llf + np.log(self.nobs)*(self.df_model +
3787 self.k_constant + k_extra)
3790class GeneralizedPoissonResults(NegativeBinomialResults):
3791 __doc__ = _discrete_results_docs % {
3792 "one_line_description": "A results class for Generalized Poisson",
3793 "extra_attr": ""}
3795 @cache_readonly
3796 def _dispersion_factor(self):
3797 p = getattr(self.model, 'parameterization', 0)
3798 mu = self.predict()
3799 return (1 + self.params[-1] * mu**p)**2
3801class L1CountResults(DiscreteResults):
3802 __doc__ = _discrete_results_docs % {"one_line_description" :
3803 "A results class for count data fit by l1 regularization",
3804 "extra_attr" : _l1_results_attr}
3806 def __init__(self, model, cntfit):
3807 super(L1CountResults, self).__init__(model, cntfit)
3808 # self.trimmed is a boolean array with T/F telling whether or not that
3809 # entry in params has been set zero'd out.
3810 self.trimmed = cntfit.mle_retvals['trimmed']
3811 self.nnz_params = (~self.trimmed).sum()
3813 # Set degrees of freedom. In doing so,
3814 # adjust for extra parameter in NegativeBinomial nb1 and nb2
3815 # extra parameter is not included in df_model
3816 k_extra = getattr(self.model, 'k_extra', 0)
3818 self.df_model = self.nnz_params - 1 - k_extra
3819 self.df_resid = float(self.model.endog.shape[0] - self.nnz_params) + k_extra
3821class PoissonResults(CountResults):
3823 def predict_prob(self, n=None, exog=None, exposure=None, offset=None,
3824 transform=True):
3825 """
3826 Return predicted probability of each count level for each observation
3828 Parameters
3829 ----------
3830 n : array_like or int
3831 The counts for which you want the probabilities. If n is None
3832 then the probabilities for each count from 0 to max(y) are
3833 given.
3835 Returns
3836 -------
3837 ndarray
3838 A nobs x n array where len(`n`) columns are indexed by the count
3839 n. If n is None, then column 0 is the probability that each
3840 observation is 0, column 1 is the probability that each
3841 observation is 1, etc.
3842 """
3843 if n is not None:
3844 counts = np.atleast_2d(n)
3845 else:
3846 counts = np.atleast_2d(np.arange(0, np.max(self.model.endog)+1))
3847 mu = self.predict(exog=exog, exposure=exposure, offset=offset,
3848 transform=transform, linear=False)[:,None]
3849 # uses broadcasting
3850 return stats.poisson.pmf(counts, mu)
3852 @property
3853 def resid_pearson(self):
3854 """
3855 Pearson residuals
3857 Notes
3858 -----
3859 Pearson residuals are defined to be
3861 .. math:: r_j = \\frac{(y - M_jp_j)}{\\sqrt{M_jp_j(1-p_j)}}
3863 where :math:`p_j=cdf(X\\beta)` and :math:`M_j` is the total number of
3864 observations sharing the covariate pattern :math:`j`.
3866 For now :math:`M_j` is always set to 1.
3867 """
3868 # Pearson residuals
3869 p = self.predict() # fittedvalues is still linear
3870 return (self.model.endog - p)/np.sqrt(p)
3873class L1PoissonResults(L1CountResults, PoissonResults):
3874 pass
3876class L1NegativeBinomialResults(L1CountResults, NegativeBinomialResults):
3877 pass
3879class L1GeneralizedPoissonResults(L1CountResults, GeneralizedPoissonResults):
3880 pass
3882class OrderedResults(DiscreteResults):
3883 __doc__ = _discrete_results_docs % {"one_line_description" : "A results class for ordered discrete data." , "extra_attr" : ""}
3884 pass
3886class BinaryResults(DiscreteResults):
3887 __doc__ = _discrete_results_docs % {"one_line_description" : "A results class for binary data", "extra_attr" : ""}
3889 def pred_table(self, threshold=.5):
3890 """
3891 Prediction table
3893 Parameters
3894 ----------
3895 threshold : scalar
3896 Number between 0 and 1. Threshold above which a prediction is
3897 considered 1 and below which a prediction is considered 0.
3899 Notes
3900 -----
3901 pred_table[i,j] refers to the number of times "i" was observed and
3902 the model predicted "j". Correct predictions are along the diagonal.
3903 """
3904 model = self.model
3905 actual = model.endog
3906 pred = np.array(self.predict() > threshold, dtype=float)
3907 bins = np.array([0, 0.5, 1])
3908 return np.histogram2d(actual, pred, bins=bins)[0]
3910 @Appender(DiscreteResults.summary.__doc__)
3911 def summary(self, yname=None, xname=None, title=None, alpha=.05,
3912 yname_list=None):
3913 smry = super(BinaryResults, self).summary(yname, xname, title, alpha,
3914 yname_list)
3915 fittedvalues = self.model.cdf(self.fittedvalues)
3916 absprederror = np.abs(self.model.endog - fittedvalues)
3917 predclose_sum = (absprederror < 1e-4).sum()
3918 predclose_frac = predclose_sum / len(fittedvalues)
3920 # add warnings/notes
3921 etext = []
3922 if predclose_sum == len(fittedvalues): # TODO: nobs?
3923 wstr = "Complete Separation: The results show that there is"
3924 wstr += "complete separation.\n"
3925 wstr += "In this case the Maximum Likelihood Estimator does "
3926 wstr += "not exist and the parameters\n"
3927 wstr += "are not identified."
3928 etext.append(wstr)
3929 elif predclose_frac > 0.1: # TODO: get better diagnosis
3930 wstr = "Possibly complete quasi-separation: A fraction "
3931 wstr += "%4.2f of observations can be\n" % predclose_frac
3932 wstr += "perfectly predicted. This might indicate that there "
3933 wstr += "is complete\nquasi-separation. In this case some "
3934 wstr += "parameters will not be identified."
3935 etext.append(wstr)
3936 if etext:
3937 smry.add_extra_txt(etext)
3938 return smry
3940 @cache_readonly
3941 def resid_dev(self):
3942 """
3943 Deviance residuals
3945 Notes
3946 -----
3947 Deviance residuals are defined
3949 .. math:: d_j = \\pm\\left(2\\left[Y_j\\ln\\left(\\frac{Y_j}{M_jp_j}\\right) + (M_j - Y_j\\ln\\left(\\frac{M_j-Y_j}{M_j(1-p_j)} \\right) \\right] \\right)^{1/2}
3951 where
3953 :math:`p_j = cdf(X\\beta)` and :math:`M_j` is the total number of
3954 observations sharing the covariate pattern :math:`j`.
3956 For now :math:`M_j` is always set to 1.
3957 """
3958 #These are the deviance residuals
3959 #model = self.model
3960 endog = self.model.endog
3961 #exog = model.exog
3962 # M = # of individuals that share a covariate pattern
3963 # so M[i] = 2 for i = two share a covariate pattern
3964 M = 1
3965 p = self.predict()
3966 #Y_0 = np.where(exog == 0)
3967 #Y_M = np.where(exog == M)
3968 #NOTE: Common covariate patterns are not yet handled
3969 res = -(1-endog)*np.sqrt(2*M*np.abs(np.log(1-p))) + \
3970 endog*np.sqrt(2*M*np.abs(np.log(p)))
3971 return res
3973 @cache_readonly
3974 def resid_pearson(self):
3975 """
3976 Pearson residuals
3978 Notes
3979 -----
3980 Pearson residuals are defined to be
3982 .. math:: r_j = \\frac{(y - M_jp_j)}{\\sqrt{M_jp_j(1-p_j)}}
3984 where :math:`p_j=cdf(X\\beta)` and :math:`M_j` is the total number of
3985 observations sharing the covariate pattern :math:`j`.
3987 For now :math:`M_j` is always set to 1.
3988 """
3989 # Pearson residuals
3990 #model = self.model
3991 endog = self.model.endog
3992 #exog = model.exog
3993 # M = # of individuals that share a covariate pattern
3994 # so M[i] = 2 for i = two share a covariate pattern
3995 # use unique row pattern?
3996 M = 1
3997 p = self.predict()
3998 return (endog - M*p)/np.sqrt(M*p*(1-p))
4000 @cache_readonly
4001 def resid_response(self):
4002 """
4003 The response residuals
4005 Notes
4006 -----
4007 Response residuals are defined to be
4009 .. math:: y - p
4011 where :math:`p=cdf(X\\beta)`.
4012 """
4013 return self.model.endog - self.predict()
4016class LogitResults(BinaryResults):
4017 __doc__ = _discrete_results_docs % {
4018 "one_line_description": "A results class for Logit Model",
4019 "extra_attr": ""}
4021 @cache_readonly
4022 def resid_generalized(self):
4023 """
4024 Generalized residuals
4026 Notes
4027 -----
4028 The generalized residuals for the Logit model are defined
4030 .. math:: y - p
4032 where :math:`p=cdf(X\\beta)`. This is the same as the `resid_response`
4033 for the Logit model.
4034 """
4035 # Generalized residuals
4036 return self.model.endog - self.predict()
4039class ProbitResults(BinaryResults):
4040 __doc__ = _discrete_results_docs % {
4041 "one_line_description": "A results class for Probit Model",
4042 "extra_attr": ""}
4044 @cache_readonly
4045 def resid_generalized(self):
4046 """
4047 Generalized residuals
4049 Notes
4050 -----
4051 The generalized residuals for the Probit model are defined
4053 .. math:: y\\frac{\\phi(X\\beta)}{\\Phi(X\\beta)}-(1-y)\\frac{\\phi(X\\beta)}{1-\\Phi(X\\beta)}
4054 """
4055 # generalized residuals
4056 model = self.model
4057 endog = model.endog
4058 XB = self.predict(linear=True)
4059 pdf = model.pdf(XB)
4060 cdf = model.cdf(XB)
4061 return endog * pdf/cdf - (1-endog)*pdf/(1-cdf)
4063class L1BinaryResults(BinaryResults):
4064 __doc__ = _discrete_results_docs % {"one_line_description" :
4065 "Results instance for binary data fit by l1 regularization",
4066 "extra_attr" : _l1_results_attr}
4067 def __init__(self, model, bnryfit):
4068 super(L1BinaryResults, self).__init__(model, bnryfit)
4069 # self.trimmed is a boolean array with T/F telling whether or not that
4070 # entry in params has been set zero'd out.
4071 self.trimmed = bnryfit.mle_retvals['trimmed']
4072 self.nnz_params = (~self.trimmed).sum()
4073 self.df_model = self.nnz_params - 1
4074 self.df_resid = float(self.model.endog.shape[0] - self.nnz_params)
4077class MultinomialResults(DiscreteResults):
4078 __doc__ = _discrete_results_docs % {"one_line_description" :
4079 "A results class for multinomial data", "extra_attr" : ""}
4081 def __init__(self, model, mlefit):
4082 super(MultinomialResults, self).__init__(model, mlefit)
4083 self.J = model.J
4084 self.K = model.K
4086 @staticmethod
4087 def _maybe_convert_ynames_int(ynames):
4088 # see if they're integers
4089 issue_warning = False
4090 msg = ('endog contains values are that not int-like. Uses string '
4091 'representation of value. Use integer-valued endog to '
4092 'suppress this warning.')
4093 for i in ynames:
4094 try:
4095 if ynames[i] % 1 == 0:
4096 ynames[i] = str(int(ynames[i]))
4097 else:
4098 issue_warning = True
4099 ynames[i] = str(ynames[i])
4100 except TypeError:
4101 ynames[i] = str(ynames[i])
4102 if issue_warning:
4103 import warnings
4104 warnings.warn(msg, SpecificationWarning)
4106 return ynames
4108 def _get_endog_name(self, yname, yname_list, all=False):
4109 """
4110 If all is False, the first variable name is dropped
4111 """
4112 model = self.model
4113 if yname is None:
4114 yname = model.endog_names
4115 if yname_list is None:
4116 ynames = model._ynames_map
4117 ynames = self._maybe_convert_ynames_int(ynames)
4118 # use range below to ensure sortedness
4119 ynames = [ynames[key] for key in range(int(model.J))]
4120 ynames = ['='.join([yname, name]) for name in ynames]
4121 if not all:
4122 yname_list = ynames[1:] # assumes first variable is dropped
4123 else:
4124 yname_list = ynames
4125 return yname, yname_list
4127 def pred_table(self):
4128 """
4129 Returns the J x J prediction table.
4131 Notes
4132 -----
4133 pred_table[i,j] refers to the number of times "i" was observed and
4134 the model predicted "j". Correct predictions are along the diagonal.
4135 """
4136 ju = self.model.J - 1 # highest index
4137 # these are the actual, predicted indices
4138 #idx = lzip(self.model.endog, self.predict().argmax(1))
4139 bins = np.concatenate(([0], np.linspace(0.5, ju - 0.5, ju), [ju]))
4140 return np.histogram2d(self.model.endog, self.predict().argmax(1),
4141 bins=bins)[0]
4143 @cache_readonly
4144 def bse(self):
4145 bse = np.sqrt(np.diag(self.cov_params()))
4146 return bse.reshape(self.params.shape, order='F')
4148 @cache_readonly
4149 def aic(self):
4150 return -2*(self.llf - (self.df_model+self.model.J-1))
4152 @cache_readonly
4153 def bic(self):
4154 return -2*self.llf + np.log(self.nobs)*(self.df_model+self.model.J-1)
4156 def conf_int(self, alpha=.05, cols=None):
4157 confint = super(DiscreteResults, self).conf_int(alpha=alpha,
4158 cols=cols)
4159 return confint.transpose(2,0,1)
4161 def margeff(self):
4162 raise NotImplementedError("Use get_margeff instead")
4164 @cache_readonly
4165 def resid_misclassified(self):
4166 """
4167 Residuals indicating which observations are misclassified.
4169 Notes
4170 -----
4171 The residuals for the multinomial model are defined as
4173 .. math:: argmax(y_i) \\neq argmax(p_i)
4175 where :math:`argmax(y_i)` is the index of the category for the
4176 endogenous variable and :math:`argmax(p_i)` is the index of the
4177 predicted probabilities for each category. That is, the residual
4178 is a binary indicator that is 0 if the category with the highest
4179 predicted probability is the same as that of the observed variable
4180 and 1 otherwise.
4181 """
4182 # it's 0 or 1 - 0 for correct prediction and 1 for a missed one
4183 return (self.model.wendog.argmax(1) !=
4184 self.predict().argmax(1)).astype(float)
4186 def summary2(self, alpha=0.05, float_format="%.4f"):
4187 """Experimental function to summarize regression results
4189 Parameters
4190 ----------
4191 alpha : float
4192 significance level for the confidence intervals
4193 float_format : str
4194 print format for floats in parameters summary
4196 Returns
4197 -------
4198 smry : Summary instance
4199 this holds the summary tables and text, which can be printed or
4200 converted to various output formats.
4202 See Also
4203 --------
4204 statsmodels.iolib.summary2.Summary : class to hold summary results
4205 """
4207 from statsmodels.iolib import summary2
4208 smry = summary2.Summary()
4209 smry.add_dict(summary2.summary_model(self))
4210 # One data frame per value of endog
4211 eqn = self.params.shape[1]
4212 confint = self.conf_int(alpha)
4213 for i in range(eqn):
4214 coefs = summary2.summary_params((self, self.params[:, i],
4215 self.bse[:, i],
4216 self.tvalues[:, i],
4217 self.pvalues[:, i],
4218 confint[i]),
4219 alpha=alpha)
4220 # Header must show value of endog
4221 level_str = self.model.endog_names + ' = ' + str(i)
4222 coefs[level_str] = coefs.index
4223 coefs = coefs.iloc[:, [-1, 0, 1, 2, 3, 4, 5]]
4224 smry.add_df(coefs, index=False, header=True,
4225 float_format=float_format)
4226 smry.add_title(results=self)
4227 return smry
4230class L1MultinomialResults(MultinomialResults):
4231 __doc__ = _discrete_results_docs % {"one_line_description" :
4232 "A results class for multinomial data fit by l1 regularization",
4233 "extra_attr" : _l1_results_attr}
4234 def __init__(self, model, mlefit):
4235 super(L1MultinomialResults, self).__init__(model, mlefit)
4236 # self.trimmed is a boolean array with T/F telling whether or not that
4237 # entry in params has been set zero'd out.
4238 self.trimmed = mlefit.mle_retvals['trimmed']
4239 self.nnz_params = (~self.trimmed).sum()
4241 # Note: J-1 constants
4242 self.df_model = self.nnz_params - (self.model.J - 1)
4243 self.df_resid = float(self.model.endog.shape[0] - self.nnz_params)
4246#### Results Wrappers ####
4248class OrderedResultsWrapper(lm.RegressionResultsWrapper):
4249 pass
4252wrap.populate_wrapper(OrderedResultsWrapper, OrderedResults)
4255class CountResultsWrapper(lm.RegressionResultsWrapper):
4256 pass
4259wrap.populate_wrapper(CountResultsWrapper, CountResults)
4262class NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper):
4263 pass
4266wrap.populate_wrapper(NegativeBinomialResultsWrapper,
4267 NegativeBinomialResults)
4270class GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper):
4271 pass
4274wrap.populate_wrapper(GeneralizedPoissonResultsWrapper,
4275 GeneralizedPoissonResults)
4278class PoissonResultsWrapper(lm.RegressionResultsWrapper):
4279 pass
4282wrap.populate_wrapper(PoissonResultsWrapper, PoissonResults)
4285class L1CountResultsWrapper(lm.RegressionResultsWrapper):
4286 pass
4289class L1PoissonResultsWrapper(lm.RegressionResultsWrapper):
4290 pass
4293wrap.populate_wrapper(L1PoissonResultsWrapper, L1PoissonResults)
4296class L1NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper):
4297 pass
4300wrap.populate_wrapper(L1NegativeBinomialResultsWrapper,
4301 L1NegativeBinomialResults)
4304class L1GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper):
4305 pass
4308wrap.populate_wrapper(L1GeneralizedPoissonResultsWrapper,
4309 L1GeneralizedPoissonResults)
4312class BinaryResultsWrapper(lm.RegressionResultsWrapper):
4313 _attrs = {"resid_dev": "rows",
4314 "resid_generalized": "rows",
4315 "resid_pearson": "rows",
4316 "resid_response": "rows"
4317 }
4318 _wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs,
4319 _attrs)
4322wrap.populate_wrapper(BinaryResultsWrapper, BinaryResults)
4325class L1BinaryResultsWrapper(lm.RegressionResultsWrapper):
4326 pass
4329wrap.populate_wrapper(L1BinaryResultsWrapper, L1BinaryResults)
4332class MultinomialResultsWrapper(lm.RegressionResultsWrapper):
4333 _attrs = {"resid_misclassified": "rows"}
4334 _wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs,
4335 _attrs)
4336 _methods = {'conf_int': 'multivariate_confint'}
4337 _wrap_methods = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_methods,
4338 _methods)
4341wrap.populate_wrapper(MultinomialResultsWrapper, MultinomialResults)
4344class L1MultinomialResultsWrapper(lm.RegressionResultsWrapper):
4345 pass
4348wrap.populate_wrapper(L1MultinomialResultsWrapper, L1MultinomialResults)