Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1__all__ = ["ZeroInflatedPoisson", "ZeroInflatedGeneralizedPoisson", 

2 "ZeroInflatedNegativeBinomialP"] 

3 

4import warnings 

5import numpy as np 

6import statsmodels.base.model as base 

7import statsmodels.base.wrapper as wrap 

8import statsmodels.regression.linear_model as lm 

9from statsmodels.discrete.discrete_model import (DiscreteModel, CountModel, 

10 Poisson, Logit, CountResults, 

11 L1CountResults, Probit, 

12 _discrete_results_docs, 

13 _validate_l1_method, 

14 GeneralizedPoisson, 

15 NegativeBinomialP) 

16from statsmodels.distributions import zipoisson, zigenpoisson, zinegbin 

17from statsmodels.tools.numdiff import approx_fprime, approx_hess 

18from statsmodels.tools.decorators import cache_readonly 

19from statsmodels.tools.sm_exceptions import ConvergenceWarning 

20from statsmodels.compat.pandas import Appender 

21 

22 

23_doc_zi_params = """ 

24 exog_infl : array_like or None 

25 Explanatory variables for the binary inflation model, i.e. for 

26 mixing probability model. If None, then a constant is used. 

27 offset : array_like 

28 Offset is added to the linear prediction with coefficient equal to 1. 

29 exposure : array_like 

30 Log(exposure) is added to the linear prediction with coefficient 

31 equal to 1. 

32 inflation : {'logit', 'probit'} 

33 The model for the zero inflation, either Logit (default) or Probit 

34 """ 

35 

36 

37class GenericZeroInflated(CountModel): 

38 __doc__ = """ 

39 Generic Zero Inflated Model 

40 

41 %(params)s 

42 %(extra_params)s 

43 

44 Attributes 

45 ---------- 

46 endog : ndarray 

47 A reference to the endogenous response variable 

48 exog : ndarray 

49 A reference to the exogenous design. 

50 exog_infl: ndarray 

51 A reference to the zero-inflated exogenous design. 

52 """ % {'params' : base._model_params_doc, 

53 'extra_params' : _doc_zi_params + base._missing_param_doc} 

54 

55 def __init__(self, endog, exog, exog_infl=None, offset=None, 

56 inflation='logit', exposure=None, missing='none', **kwargs): 

57 super(GenericZeroInflated, self).__init__(endog, exog, offset=offset, 

58 exposure=exposure, 

59 missing=missing, **kwargs) 

60 

61 if exog_infl is None: 

62 self.k_inflate = 1 

63 self.exog_infl = np.ones((endog.size, self.k_inflate), 

64 dtype=np.float64) 

65 else: 

66 self.exog_infl = exog_infl 

67 self.k_inflate = exog_infl.shape[1] 

68 

69 if len(exog.shape) == 1: 

70 self.k_exog = 1 

71 else: 

72 self.k_exog = exog.shape[1] 

73 

74 self.infl = inflation 

75 if inflation == 'logit': 

76 self.model_infl = Logit(np.zeros(self.exog_infl.shape[0]), 

77 self.exog_infl) 

78 self._hessian_inflate = self._hessian_logit 

79 elif inflation == 'probit': 

80 self.model_infl = Probit(np.zeros(self.exog_infl.shape[0]), 

81 self.exog_infl) 

82 self._hessian_inflate = self._hessian_probit 

83 

84 else: 

85 raise ValueError("inflation == %s, which is not handled" 

86 % inflation) 

87 

88 self.inflation = inflation 

89 self.k_extra = self.k_inflate 

90 

91 if len(self.exog) != len(self.exog_infl): 

92 raise ValueError('exog and exog_infl have different number of' 

93 'observation. `missing` handling is not supported') 

94 

95 infl_names = ['inflate_%s' % i for i in self.model_infl.data.param_names] 

96 self.exog_names[:] = infl_names + list(self.exog_names) 

97 self.exog_infl = np.asarray(self.exog_infl, dtype=np.float64) 

98 

99 self._init_keys.extend(['exog_infl', 'inflation']) 

100 self._null_drop_keys = ['exog_infl'] 

101 

102 def loglike(self, params): 

103 """ 

104 Loglikelihood of Generic Zero Inflated model. 

105 

106 Parameters 

107 ---------- 

108 params : array_like 

109 The parameters of the model. 

110 

111 Returns 

112 ------- 

113 loglike : float 

114 The log-likelihood function of the model evaluated at `params`. 

115 See notes. 

116 

117 Notes 

118 -------- 

119 .. math:: \\ln L=\\sum_{y_{i}=0}\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+ 

120 \\sum_{y_{i}>0}(\\ln(1-w_{i})+L_{main\\_model}) 

121 where P - pdf of main model, L - loglike function of main model. 

122 """ 

123 return np.sum(self.loglikeobs(params)) 

124 

125 def loglikeobs(self, params): 

126 """ 

127 Loglikelihood for observations of Generic Zero Inflated model. 

128 

129 Parameters 

130 ---------- 

131 params : array_like 

132 The parameters of the model. 

133 

134 Returns 

135 ------- 

136 loglike : ndarray 

137 The log likelihood for each observation of the model evaluated 

138 at `params`. See Notes for definition. 

139 

140 Notes 

141 -------- 

142 .. math:: \\ln L=\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+ 

143 \\ln(1-w_{i})+L_{main\\_model} 

144 where P - pdf of main model, L - loglike function of main model. 

145 

146 for observations :math:`i=1,...,n` 

147 """ 

148 params_infl = params[:self.k_inflate] 

149 params_main = params[self.k_inflate:] 

150 

151 y = self.endog 

152 w = self.model_infl.predict(params_infl) 

153 

154 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 

155 llf_main = self.model_main.loglikeobs(params_main) 

156 zero_idx = np.nonzero(y == 0)[0] 

157 nonzero_idx = np.nonzero(y)[0] 

158 

159 llf = np.zeros_like(y, dtype=np.float64) 

160 llf[zero_idx] = (np.log(w[zero_idx] + 

161 (1 - w[zero_idx]) * np.exp(llf_main[zero_idx]))) 

162 llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx] 

163 

164 return llf 

165 

166 @Appender(DiscreteModel.fit.__doc__) 

167 def fit(self, start_params=None, method='bfgs', maxiter=35, 

168 full_output=1, disp=1, callback=None, 

169 cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs): 

170 if start_params is None: 

171 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0) 

172 if np.size(offset) == 1 and offset == 0: 

173 offset = None 

174 start_params = self._get_start_params() 

175 

176 if callback is None: 

177 # work around perfect separation callback #3895 

178 callback = lambda *x: x 

179 

180 mlefit = super(GenericZeroInflated, self).fit(start_params=start_params, 

181 maxiter=maxiter, disp=disp, method=method, 

182 full_output=full_output, callback=callback, 

183 **kwargs) 

184 

185 zipfit = self.result_class(self, mlefit._results) 

186 result = self.result_class_wrapper(zipfit) 

187 

188 if cov_kwds is None: 

189 cov_kwds = {} 

190 

191 result._get_robustcov_results(cov_type=cov_type, 

192 use_self=True, use_t=use_t, **cov_kwds) 

193 return result 

194 

195 @Appender(DiscreteModel.fit_regularized.__doc__) 

196 def fit_regularized(self, start_params=None, method='l1', 

197 maxiter='defined_by_method', full_output=1, disp=1, callback=None, 

198 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4, 

199 qc_tol=0.03, **kwargs): 

200 

201 _validate_l1_method(method) 

202 

203 if np.size(alpha) == 1 and alpha != 0: 

204 k_params = self.k_exog + self.k_inflate 

205 alpha = alpha * np.ones(k_params) 

206 

207 extra = self.k_extra - self.k_inflate 

208 alpha_p = alpha[:-(self.k_extra - extra)] if (self.k_extra 

209 and np.size(alpha) > 1) else alpha 

210 if start_params is None: 

211 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0) 

212 if np.size(offset) == 1 and offset == 0: 

213 offset = None 

214 start_params = self.model_main.fit_regularized( 

215 start_params=start_params, method=method, maxiter=maxiter, 

216 full_output=full_output, disp=0, callback=callback, 

217 alpha=alpha_p, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

218 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params 

219 start_params = np.append(np.ones(self.k_inflate), start_params) 

220 cntfit = super(CountModel, self).fit_regularized( 

221 start_params=start_params, method=method, maxiter=maxiter, 

222 full_output=full_output, disp=disp, callback=callback, 

223 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

224 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs) 

225 

226 discretefit = self.result_class_reg(self, cntfit) 

227 return self.result_class_reg_wrapper(discretefit) 

228 

229 def score_obs(self, params): 

230 """ 

231 Generic Zero Inflated model score (gradient) vector of the log-likelihood 

232 

233 Parameters 

234 ---------- 

235 params : array_like 

236 The parameters of the model 

237 

238 Returns 

239 ------- 

240 score : ndarray, 1-D 

241 The score vector of the model, i.e. the first derivative of the 

242 loglikelihood function, evaluated at `params` 

243 """ 

244 params_infl = params[:self.k_inflate] 

245 params_main = params[self.k_inflate:] 

246 

247 y = self.endog 

248 w = self.model_infl.predict(params_infl) 

249 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 

250 score_main = self.model_main.score_obs(params_main) 

251 llf_main = self.model_main.loglikeobs(params_main) 

252 llf = self.loglikeobs(params) 

253 zero_idx = np.nonzero(y == 0)[0] 

254 nonzero_idx = np.nonzero(y)[0] 

255 

256 mu = self.model_main.predict(params_main) 

257 

258 dldp = np.zeros((self.exog.shape[0], self.k_exog), dtype=np.float64) 

259 dldw = np.zeros_like(self.exog_infl, dtype=np.float64) 

260 

261 dldp[zero_idx,:] = (score_main[zero_idx].T * 

262 (1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T 

263 dldp[nonzero_idx,:] = score_main[nonzero_idx] 

264 

265 if self.inflation == 'logit': 

266 dldw[zero_idx,:] = (self.exog_infl[zero_idx].T * w[zero_idx] * 

267 (1 - w[zero_idx]) * 

268 (1 - np.exp(llf_main[zero_idx])) / 

269 np.exp(llf[zero_idx])).T 

270 dldw[nonzero_idx,:] = -(self.exog_infl[nonzero_idx].T * 

271 w[nonzero_idx]).T 

272 elif self.inflation == 'probit': 

273 return approx_fprime(params, self.loglikeobs) 

274 

275 return np.hstack((dldw, dldp)) 

276 

277 def score(self, params): 

278 return self.score_obs(params).sum(0) 

279 

280 def _hessian_main(self, params): 

281 pass 

282 

283 def _hessian_logit(self, params): 

284 params_infl = params[:self.k_inflate] 

285 params_main = params[self.k_inflate:] 

286 

287 y = self.endog 

288 w = self.model_infl.predict(params_infl) 

289 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 

290 score_main = self.model_main.score_obs(params_main) 

291 llf_main = self.model_main.loglikeobs(params_main) 

292 llf = self.loglikeobs(params) 

293 zero_idx = np.nonzero(y == 0)[0] 

294 nonzero_idx = np.nonzero(y)[0] 

295 

296 hess_arr = np.zeros((self.k_inflate, self.k_exog + self.k_inflate)) 

297 

298 pmf = np.exp(llf) 

299 

300 #d2l/dw2 

301 for i in range(self.k_inflate): 

302 for j in range(i, -1, -1): 

303 hess_arr[i, j] = (( 

304 self.exog_infl[zero_idx, i] * self.exog_infl[zero_idx, j] * 

305 (w[zero_idx] * (1 - w[zero_idx]) * ((1 - 

306 np.exp(llf_main[zero_idx])) * (1 - 2 * w[zero_idx]) * 

307 np.exp(llf[zero_idx]) - (w[zero_idx] - w[zero_idx]**2) * 

308 (1 - np.exp(llf_main[zero_idx]))**2) / 

309 pmf[zero_idx]**2)).sum() - 

310 (self.exog_infl[nonzero_idx, i] * self.exog_infl[nonzero_idx, j] * 

311 w[nonzero_idx] * (1 - w[nonzero_idx])).sum()) 

312 

313 #d2l/dpdw 

314 for i in range(self.k_inflate): 

315 for j in range(self.k_exog): 

316 hess_arr[i, j + self.k_inflate] = -(score_main[zero_idx, j] * 

317 w[zero_idx] * (1 - w[zero_idx]) * 

318 self.exog_infl[zero_idx, i] / pmf[zero_idx]).sum() 

319 

320 return hess_arr 

321 

322 def _hessian_probit(self, params): 

323 pass 

324 

325 def hessian(self, params): 

326 """ 

327 Generic Zero Inflated model Hessian matrix of the loglikelihood 

328 

329 Parameters 

330 ---------- 

331 params : array_like 

332 The parameters of the model 

333 

334 Returns 

335 ------- 

336 hess : ndarray, (k_vars, k_vars) 

337 The Hessian, second derivative of loglikelihood function, 

338 evaluated at `params` 

339 

340 Notes 

341 ----- 

342 """ 

343 hess_arr_main = self._hessian_main(params) 

344 hess_arr_infl = self._hessian_inflate(params) 

345 

346 if hess_arr_main is None or hess_arr_infl is None: 

347 return approx_hess(params, self.loglike) 

348 

349 dim = self.k_exog + self.k_inflate 

350 

351 hess_arr = np.zeros((dim, dim)) 

352 

353 hess_arr[:self.k_inflate,:] = hess_arr_infl 

354 hess_arr[self.k_inflate:,self.k_inflate:] = hess_arr_main 

355 

356 tri_idx = np.triu_indices(self.k_exog + self.k_inflate, k=1) 

357 hess_arr[tri_idx] = hess_arr.T[tri_idx] 

358 

359 return hess_arr 

360 

361 def predict(self, params, exog=None, exog_infl=None, exposure=None, 

362 offset=None, which='mean'): 

363 """ 

364 Predict response variable of a count model given exogenous variables. 

365 

366 Parameters 

367 ---------- 

368 params : array_like 

369 The parameters of the model 

370 exog : ndarray, optional 

371 A reference to the exogenous design. 

372 If not assigned, will be used exog from fitting. 

373 exog_infl : ndarray, optional 

374 A reference to the zero-inflated exogenous design. 

375 If not assigned, will be used exog from fitting. 

376 offset : ndarray, optional 

377 Offset is added to the linear prediction with coefficient equal to 1. 

378 exposure : ndarray, optional 

379 Log(exposure) is added to the linear prediction with coefficient 

380 equal to 1. If exposure is specified, then it will be logged by the method. 

381 The user does not need to log it first. 

382 which : str, optional 

383 Define values that will be predicted. 

384 'mean', 'mean-main', 'linear', 'mean-nonzero', 'prob-zero, 'prob', 'prob-main' 

385 Default is 'mean'. 

386 

387 Notes 

388 ----- 

389 """ 

390 if exog is None: 

391 exog = self.exog 

392 

393 if exog_infl is None: 

394 exog_infl = self.exog_infl 

395 

396 if exposure is None: 

397 exposure = getattr(self, 'exposure', 0) 

398 else: 

399 exposure = np.log(exposure) 

400 

401 if offset is None: 

402 offset = 0 

403 

404 params_infl = params[:self.k_inflate] 

405 params_main = params[self.k_inflate:] 

406 

407 prob_main = 1 - self.model_infl.predict(params_infl, exog_infl) 

408 

409 lin_pred = np.dot(exog, params_main[:self.exog.shape[1]]) + exposure + offset 

410 

411 # Refactor: This is pretty hacky, 

412 # there should be an appropriate predict method in model_main 

413 # this is just prob(y=0 | model_main) 

414 tmp_exog = self.model_main.exog 

415 tmp_endog = self.model_main.endog 

416 tmp_offset = getattr(self.model_main, 'offset', ['no']) 

417 tmp_exposure = getattr(self.model_main, 'exposure', ['no']) 

418 self.model_main.exog = exog 

419 self.model_main.endog = np.zeros((exog.shape[0])) 

420 self.model_main.offset = offset 

421 self.model_main.exposure = exposure 

422 llf = self.model_main.loglikeobs(params_main) 

423 self.model_main.exog = tmp_exog 

424 self.model_main.endog = tmp_endog 

425 # tmp_offset might be an array with elementwise equality testing 

426 if len(tmp_offset) == 1 and tmp_offset[0] == 'no': 

427 del self.model_main.offset 

428 else: 

429 self.model_main.offset = tmp_offset 

430 if len(tmp_exposure) == 1 and tmp_exposure[0] == 'no': 

431 del self.model_main.exposure 

432 else: 

433 self.model_main.exposure = tmp_exposure 

434 # end hack 

435 

436 prob_zero = (1 - prob_main) + prob_main * np.exp(llf) 

437 

438 if which == 'mean': 

439 return prob_main * np.exp(lin_pred) 

440 elif which == 'mean-main': 

441 return np.exp(lin_pred) 

442 elif which == 'linear': 

443 return lin_pred 

444 elif which == 'mean-nonzero': 

445 return prob_main * np.exp(lin_pred) / (1 - prob_zero) 

446 elif which == 'prob-zero': 

447 return prob_zero 

448 elif which == 'prob-main': 

449 return prob_main 

450 elif which == 'prob': 

451 return self._predict_prob(params, exog, exog_infl, exposure, offset) 

452 else: 

453 raise ValueError('which = %s is not available' % which) 

454 

455 

456class ZeroInflatedPoisson(GenericZeroInflated): 

457 __doc__ = """ 

458 Poisson Zero Inflated Model 

459 

460 %(params)s 

461 %(extra_params)s 

462 

463 Attributes 

464 ---------- 

465 endog : ndarray 

466 A reference to the endogenous response variable 

467 exog : ndarray 

468 A reference to the exogenous design. 

469 exog_infl: ndarray 

470 A reference to the zero-inflated exogenous design. 

471 """ % {'params' : base._model_params_doc, 

472 'extra_params' : _doc_zi_params + base._missing_param_doc} 

473 

474 def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None, 

475 inflation='logit', missing='none', **kwargs): 

476 super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset, 

477 inflation=inflation, 

478 exog_infl=exog_infl, 

479 exposure=exposure, 

480 missing=missing, **kwargs) 

481 self.model_main = Poisson(self.endog, self.exog, offset=offset, 

482 exposure=exposure) 

483 self.distribution = zipoisson 

484 self.result_class = ZeroInflatedPoissonResults 

485 self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper 

486 self.result_class_reg = L1ZeroInflatedPoissonResults 

487 self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper 

488 

489 def _hessian_main(self, params): 

490 params_infl = params[:self.k_inflate] 

491 params_main = params[self.k_inflate:] 

492 

493 y = self.endog 

494 w = self.model_infl.predict(params_infl) 

495 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 

496 score = self.score(params) 

497 zero_idx = np.nonzero(y == 0)[0] 

498 nonzero_idx = np.nonzero(y)[0] 

499 

500 mu = self.model_main.predict(params_main) 

501 

502 hess_arr = np.zeros((self.k_exog, self.k_exog)) 

503 

504 coeff = (1 + w[zero_idx] * (np.exp(mu[zero_idx]) - 1)) 

505 

506 #d2l/dp2 

507 for i in range(self.k_exog): 

508 for j in range(i, -1, -1): 

509 hess_arr[i, j] = (( 

510 self.exog[zero_idx, i] * self.exog[zero_idx, j] * 

511 mu[zero_idx] * (w[zero_idx] - 1) * (1 / coeff - 

512 w[zero_idx] * mu[zero_idx] * np.exp(mu[zero_idx]) / 

513 coeff**2)).sum() - (mu[nonzero_idx] * self.exog[nonzero_idx, i] * 

514 self.exog[nonzero_idx, j]).sum()) 

515 

516 return hess_arr 

517 

518 def _predict_prob(self, params, exog, exog_infl, exposure, offset): 

519 params_infl = params[:self.k_inflate] 

520 params_main = params[self.k_inflate:] 

521 

522 counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1)) 

523 

524 if len(exog_infl.shape) < 2: 

525 transform = True 

526 w = np.atleast_2d( 

527 self.model_infl.predict(params_infl, exog_infl))[:, None] 

528 else: 

529 transform = False 

530 w = self.model_infl.predict(params_infl, exog_infl)[:, None] 

531 

532 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 

533 mu = self.model_main.predict(params_main, exog, 

534 offset=offset)[:, None] 

535 result = self.distribution.pmf(counts, mu, w) 

536 return result[0] if transform else result 

537 

538 def _get_start_params(self): 

539 start_params = self.model_main.fit(disp=0, method="nm").params 

540 start_params = np.append(np.ones(self.k_inflate) * 0.1, start_params) 

541 return start_params 

542 

543 

544class ZeroInflatedGeneralizedPoisson(GenericZeroInflated): 

545 __doc__ = """ 

546 Zero Inflated Generalized Poisson Model 

547 

548 %(params)s 

549 %(extra_params)s 

550 

551 Attributes 

552 ---------- 

553 endog : ndarray 

554 A reference to the endogenous response variable 

555 exog : ndarray 

556 A reference to the exogenous design. 

557 exog_infl: ndarray 

558 A reference to the zero-inflated exogenous design. 

559 p: scalar 

560 P denotes parametrizations for ZIGP regression. 

561 """ % {'params' : base._model_params_doc, 

562 'extra_params' : _doc_zi_params + 

563 """p : float 

564 dispersion power parameter for the GeneralizedPoisson model. p=1 for 

565 ZIGP-1 and p=2 for ZIGP-2. Default is p=2 

566 """ + base._missing_param_doc} 

567 

568 def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None, 

569 inflation='logit', p=2, missing='none', **kwargs): 

570 super(ZeroInflatedGeneralizedPoisson, self).__init__(endog, exog, 

571 offset=offset, 

572 inflation=inflation, 

573 exog_infl=exog_infl, 

574 exposure=exposure, 

575 missing=missing, **kwargs) 

576 self.model_main = GeneralizedPoisson(self.endog, self.exog, 

577 offset=offset, exposure=exposure, p=p) 

578 self.distribution = zigenpoisson 

579 self.k_exog += 1 

580 self.k_extra += 1 

581 self.exog_names.append("alpha") 

582 self.result_class = ZeroInflatedGeneralizedPoissonResults 

583 self.result_class_wrapper = ZeroInflatedGeneralizedPoissonResultsWrapper 

584 self.result_class_reg = L1ZeroInflatedGeneralizedPoissonResults 

585 self.result_class_reg_wrapper = L1ZeroInflatedGeneralizedPoissonResultsWrapper 

586 

587 def _get_init_kwds(self): 

588 kwds = super(ZeroInflatedGeneralizedPoisson, self)._get_init_kwds() 

589 kwds['p'] = self.model_main.parameterization + 1 

590 return kwds 

591 

592 def _predict_prob(self, params, exog, exog_infl, exposure, offset): 

593 params_infl = params[:self.k_inflate] 

594 params_main = params[self.k_inflate:] 

595 

596 p = self.model_main.parameterization 

597 counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1)) 

598 

599 if len(exog_infl.shape) < 2: 

600 transform = True 

601 w = np.atleast_2d( 

602 self.model_infl.predict(params_infl, exog_infl))[:, None] 

603 else: 

604 transform = False 

605 w = self.model_infl.predict(params_infl, exog_infl)[:, None] 

606 

607 w[w == 1.] = np.nextafter(1, 0) 

608 mu = self.model_main.predict(params_main, exog, 

609 exposure=exposure, offset=offset)[:, None] 

610 result = self.distribution.pmf(counts, mu, params_main[-1], p, w) 

611 return result[0] if transform else result 

612 

613 def _get_start_params(self): 

614 with warnings.catch_warnings(): 

615 warnings.simplefilter("ignore", category=ConvergenceWarning) 

616 start_params = ZeroInflatedPoisson(self.endog, self.exog, 

617 exog_infl=self.exog_infl).fit(disp=0).params 

618 start_params = np.append(start_params, 0.1) 

619 return start_params 

620 

621 

622class ZeroInflatedNegativeBinomialP(GenericZeroInflated): 

623 __doc__ = """ 

624 Zero Inflated Generalized Negative Binomial Model 

625 

626 %(params)s 

627 %(extra_params)s 

628 

629 Attributes 

630 ---------- 

631 endog : ndarray 

632 A reference to the endogenous response variable 

633 exog : ndarray 

634 A reference to the exogenous design. 

635 exog_infl: ndarray 

636 A reference to the zero-inflated exogenous design. 

637 p: scalar 

638 P denotes parametrizations for ZINB regression. p=1 for ZINB-1 and 

639 p=2 for ZINB-2. Default is p=2 

640 """ % {'params' : base._model_params_doc, 

641 'extra_params' : _doc_zi_params + 

642 """p : float 

643 dispersion power parameter for the NegativeBinomialP model. p=1 for 

644 ZINB-1 and p=2 for ZINM-2. Default is p=2 

645 """ + base._missing_param_doc} 

646 

647 def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None, 

648 inflation='logit', p=2, missing='none', **kwargs): 

649 super(ZeroInflatedNegativeBinomialP, self).__init__(endog, exog, 

650 offset=offset, 

651 inflation=inflation, 

652 exog_infl=exog_infl, 

653 exposure=exposure, 

654 missing=missing, **kwargs) 

655 self.model_main = NegativeBinomialP(self.endog, self.exog, 

656 offset=offset, exposure=exposure, p=p) 

657 self.distribution = zinegbin 

658 self.k_exog += 1 

659 self.k_extra += 1 

660 self.exog_names.append("alpha") 

661 self.result_class = ZeroInflatedNegativeBinomialResults 

662 self.result_class_wrapper = ZeroInflatedNegativeBinomialResultsWrapper 

663 self.result_class_reg = L1ZeroInflatedNegativeBinomialResults 

664 self.result_class_reg_wrapper = L1ZeroInflatedNegativeBinomialResultsWrapper 

665 

666 def _get_init_kwds(self): 

667 kwds = super(ZeroInflatedNegativeBinomialP, self)._get_init_kwds() 

668 kwds['p'] = self.model_main.parameterization 

669 return kwds 

670 

671 def _predict_prob(self, params, exog, exog_infl, exposure, offset): 

672 params_infl = params[:self.k_inflate] 

673 params_main = params[self.k_inflate:] 

674 

675 p = self.model_main.parameterization 

676 counts = np.arange(0, np.max(self.endog)+1) 

677 

678 if len(exog_infl.shape) < 2: 

679 transform = True 

680 w = np.atleast_2d( 

681 self.model_infl.predict(params_infl, exog_infl))[:, None] 

682 else: 

683 transform = False 

684 w = self.model_infl.predict(params_infl, exog_infl)[:, None] 

685 

686 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 

687 mu = self.model_main.predict(params_main, exog, 

688 exposure=exposure, offset=offset)[:, None] 

689 result = self.distribution.pmf(counts, mu, params_main[-1], p, w) 

690 return result[0] if transform else result 

691 

692 def _get_start_params(self): 

693 with warnings.catch_warnings(): 

694 warnings.simplefilter("ignore", category=ConvergenceWarning) 

695 start_params = self.model_main.fit(disp=0, method='nm').params 

696 start_params = np.append(np.zeros(self.k_inflate), start_params) 

697 return start_params 

698 

699 

700class ZeroInflatedPoissonResults(CountResults): 

701 __doc__ = _discrete_results_docs % { 

702 "one_line_description": "A results class for Zero Inflated Poisson", 

703 "extra_attr": ""} 

704 

705 @cache_readonly 

706 def _dispersion_factor(self): 

707 mu = self.predict(which='linear') 

708 w = 1 - self.predict() / np.exp(self.predict(which='linear')) 

709 return (1 + w * np.exp(mu)) 

710 

711 def get_margeff(self, at='overall', method='dydx', atexog=None, 

712 dummy=False, count=False): 

713 """Get marginal effects of the fitted model. 

714 

715 Not yet implemented for Zero Inflated Models 

716 """ 

717 raise NotImplementedError("not yet implemented for zero inflation") 

718 

719 

720class L1ZeroInflatedPoissonResults(L1CountResults, ZeroInflatedPoissonResults): 

721 pass 

722 

723 

724class ZeroInflatedPoissonResultsWrapper(lm.RegressionResultsWrapper): 

725 pass 

726wrap.populate_wrapper(ZeroInflatedPoissonResultsWrapper, 

727 ZeroInflatedPoissonResults) 

728 

729 

730class L1ZeroInflatedPoissonResultsWrapper(lm.RegressionResultsWrapper): 

731 pass 

732wrap.populate_wrapper(L1ZeroInflatedPoissonResultsWrapper, 

733 L1ZeroInflatedPoissonResults) 

734 

735 

736class ZeroInflatedGeneralizedPoissonResults(CountResults): 

737 __doc__ = _discrete_results_docs % { 

738 "one_line_description": "A results class for Zero Inflated Generalized Poisson", 

739 "extra_attr": ""} 

740 

741 @cache_readonly 

742 def _dispersion_factor(self): 

743 p = self.model.model_main.parameterization 

744 alpha = self.params[self.model.k_inflate:][-1] 

745 mu = np.exp(self.predict(which='linear')) 

746 w = 1 - self.predict() / mu 

747 return ((1 + alpha * mu**p)**2 + w * mu) 

748 

749 def get_margeff(self, at='overall', method='dydx', atexog=None, 

750 dummy=False, count=False): 

751 """Get marginal effects of the fitted model. 

752 

753 Not yet implemented for Zero Inflated Models 

754 """ 

755 raise NotImplementedError("not yet implemented for zero inflation") 

756 

757 

758class L1ZeroInflatedGeneralizedPoissonResults(L1CountResults, 

759 ZeroInflatedGeneralizedPoissonResults): 

760 pass 

761 

762 

763class ZeroInflatedGeneralizedPoissonResultsWrapper( 

764 lm.RegressionResultsWrapper): 

765 pass 

766wrap.populate_wrapper(ZeroInflatedGeneralizedPoissonResultsWrapper, 

767 ZeroInflatedGeneralizedPoissonResults) 

768 

769 

770class L1ZeroInflatedGeneralizedPoissonResultsWrapper( 

771 lm.RegressionResultsWrapper): 

772 pass 

773wrap.populate_wrapper(L1ZeroInflatedGeneralizedPoissonResultsWrapper, 

774 L1ZeroInflatedGeneralizedPoissonResults) 

775 

776 

777class ZeroInflatedNegativeBinomialResults(CountResults): 

778 __doc__ = _discrete_results_docs % { 

779 "one_line_description": "A results class for Zero Inflated Generalized Negative Binomial", 

780 "extra_attr": ""} 

781 

782 @cache_readonly 

783 def _dispersion_factor(self): 

784 p = self.model.model_main.parameterization 

785 alpha = self.params[self.model.k_inflate:][-1] 

786 mu = np.exp(self.predict(which='linear')) 

787 w = 1 - self.predict() / mu 

788 return (1 + alpha * mu**(p-1) + w * mu) 

789 

790 def get_margeff(self, at='overall', method='dydx', atexog=None, 

791 dummy=False, count=False): 

792 """Get marginal effects of the fitted model. 

793 

794 Not yet implemented for Zero Inflated Models 

795 """ 

796 raise NotImplementedError("not yet implemented for zero inflation") 

797 

798 

799class L1ZeroInflatedNegativeBinomialResults(L1CountResults, 

800 ZeroInflatedNegativeBinomialResults): 

801 pass 

802 

803 

804class ZeroInflatedNegativeBinomialResultsWrapper( 

805 lm.RegressionResultsWrapper): 

806 pass 

807wrap.populate_wrapper(ZeroInflatedNegativeBinomialResultsWrapper, 

808 ZeroInflatedNegativeBinomialResults) 

809 

810 

811class L1ZeroInflatedNegativeBinomialResultsWrapper( 

812 lm.RegressionResultsWrapper): 

813 pass 

814wrap.populate_wrapper(L1ZeroInflatedNegativeBinomialResultsWrapper, 

815 L1ZeroInflatedNegativeBinomialResults)