Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1''' 

2The one parameter exponential family distributions used by GLM. 

3''' 

4# TODO: quasi, quasibinomial, quasipoisson 

5# see 

6# http://www.biostat.jhsph.edu/~qli/biostatistics_r_doc/library/stats/html/family.html 

7# for comparison to R, and McCullagh and Nelder 

8 

9 

10import warnings 

11import inspect 

12import numpy as np 

13from scipy import special 

14from . import links as L 

15from . import varfuncs as V 

16 

17FLOAT_EPS = np.finfo(float).eps 

18 

19 

20class Family(object): 

21 """ 

22 The parent class for one-parameter exponential families. 

23 

24 Parameters 

25 ---------- 

26 link : a link function instance 

27 Link is the linear transformation function. 

28 See the individual families for available links. 

29 variance : a variance function 

30 Measures the variance as a function of the mean probabilities. 

31 See the individual families for the default variance function. 

32 

33 See Also 

34 -------- 

35 :ref:`links` : Further details on links. 

36 """ 

37 # TODO: change these class attributes, use valid somewhere... 

38 valid = [-np.inf, np.inf] 

39 links = [] 

40 

41 def _setlink(self, link): 

42 """ 

43 Helper method to set the link for a family. 

44 

45 Raises a ``ValueError`` exception if the link is not available. Note 

46 that the error message might not be that informative because it tells 

47 you that the link should be in the base class for the link function. 

48 

49 See statsmodels.genmod.generalized_linear_model.GLM for a list of 

50 appropriate links for each family but note that not all of these are 

51 currently available. 

52 """ 

53 # TODO: change the links class attribute in the families to hold 

54 # meaningful information instead of a list of links instances such as 

55 # [<statsmodels.family.links.Log object at 0x9a4240c>, 

56 # <statsmodels.family.links.Power object at 0x9a423ec>, 

57 # <statsmodels.family.links.Power object at 0x9a4236c>] 

58 # for Poisson... 

59 self._link = link 

60 if not isinstance(link, L.Link): 

61 raise TypeError("The input should be a valid Link object.") 

62 if hasattr(self, "links"): 

63 validlink = max([isinstance(link, _) for _ in self.links]) 

64 if not validlink: 

65 errmsg = "Invalid link for family, should be in %s. (got %s)" 

66 raise ValueError(errmsg % (repr(self.links), link)) 

67 

68 def _getlink(self): 

69 """ 

70 Helper method to get the link for a family. 

71 """ 

72 return self._link 

73 

74 # link property for each family is a pointer to link instance 

75 link = property(_getlink, _setlink, doc="Link function for family") 

76 

77 def __init__(self, link, variance): 

78 if inspect.isclass(link): 

79 warnmssg = "Calling Family(..) with a link class as argument " 

80 warnmssg += "is deprecated.\n" 

81 warnmssg += "Use an instance of a link class instead." 

82 lvl = 2 if type(self) is Family else 3 

83 warnings.warn(warnmssg, 

84 category=DeprecationWarning, stacklevel=lvl) 

85 self.link = link() 

86 else: 

87 self.link = link 

88 self.variance = variance 

89 

90 def starting_mu(self, y): 

91 r""" 

92 Starting value for mu in the IRLS algorithm. 

93 

94 Parameters 

95 ---------- 

96 y : ndarray 

97 The untransformed response variable. 

98 

99 Returns 

100 ------- 

101 mu_0 : ndarray 

102 The first guess on the transformed response variable. 

103 

104 Notes 

105 ----- 

106 .. math:: 

107 

108 \mu_0 = (Y + \overline{Y})/2 

109 

110 Only the Binomial family takes a different initial value. 

111 """ 

112 return (y + y.mean())/2. 

113 

114 def weights(self, mu): 

115 r""" 

116 Weights for IRLS steps 

117 

118 Parameters 

119 ---------- 

120 mu : array_like 

121 The transformed mean response variable in the exponential family 

122 

123 Returns 

124 ------- 

125 w : ndarray 

126 The weights for the IRLS steps 

127 

128 Notes 

129 ----- 

130 .. math:: 

131 

132 w = 1 / (g'(\mu)^2 * Var(\mu)) 

133 """ 

134 return 1. / (self.link.deriv(mu)**2 * self.variance(mu)) 

135 

136 def deviance(self, endog, mu, var_weights=1., freq_weights=1., scale=1.): 

137 r""" 

138 The deviance function evaluated at (endog, mu, var_weights, 

139 freq_weights, scale) for the distribution. 

140 

141 Deviance is usually defined as twice the loglikelihood ratio. 

142 

143 Parameters 

144 ---------- 

145 endog : array_like 

146 The endogenous response variable 

147 mu : array_like 

148 The inverse of the link function at the linear predicted values. 

149 var_weights : array_like 

150 1d array of variance (analytic) weights. The default is 1. 

151 freq_weights : array_like 

152 1d array of frequency weights. The default is 1. 

153 scale : float, optional 

154 An optional scale argument. The default is 1. 

155 

156 Returns 

157 ------- 

158 Deviance : ndarray 

159 The value of deviance function defined below. 

160 

161 Notes 

162 ----- 

163 Deviance is defined 

164 

165 .. math:: 

166 

167 D = 2\sum_i (freq\_weights_i * var\_weights * 

168 (llf(endog_i, endog_i) - llf(endog_i, \mu_i))) 

169 

170 where y is the endogenous variable. The deviance functions are 

171 analytically defined for each family. 

172 

173 Internally, we calculate deviance as: 

174 

175 .. math:: 

176 D = \sum_i freq\_weights_i * var\_weights * resid\_dev_i / scale 

177 """ 

178 resid_dev = self._resid_dev(endog, mu) 

179 return np.sum(resid_dev * freq_weights * var_weights / scale) 

180 

181 def resid_dev(self, endog, mu, var_weights=1., scale=1.): 

182 r""" 

183 The deviance residuals 

184 

185 Parameters 

186 ---------- 

187 endog : array_like 

188 The endogenous response variable 

189 mu : array_like 

190 The inverse of the link function at the linear predicted values. 

191 var_weights : array_like 

192 1d array of variance (analytic) weights. The default is 1. 

193 scale : float, optional 

194 An optional scale argument. The default is 1. 

195 

196 Returns 

197 ------- 

198 resid_dev : float 

199 Deviance residuals as defined below. 

200 

201 Notes 

202 ----- 

203 The deviance residuals are defined by the contribution D_i of 

204 observation i to the deviance as 

205 

206 .. math:: 

207 resid\_dev_i = sign(y_i-\mu_i) \sqrt{D_i} 

208 

209 D_i is calculated from the _resid_dev method in each family. 

210 Distribution-specific documentation of the calculation is available 

211 there. 

212 """ 

213 resid_dev = self._resid_dev(endog, mu) 

214 resid_dev *= var_weights / scale 

215 return np.sign(endog - mu) * np.sqrt(np.clip(resid_dev, 0., np.inf)) 

216 

217 def fitted(self, lin_pred): 

218 r""" 

219 Fitted values based on linear predictors lin_pred. 

220 

221 Parameters 

222 ---------- 

223 lin_pred : ndarray 

224 Values of the linear predictor of the model. 

225 :math:`X \cdot \beta` in a classical linear model. 

226 

227 Returns 

228 ------- 

229 mu : ndarray 

230 The mean response variables given by the inverse of the link 

231 function. 

232 """ 

233 fits = self.link.inverse(lin_pred) 

234 return fits 

235 

236 def predict(self, mu): 

237 """ 

238 Linear predictors based on given mu values. 

239 

240 Parameters 

241 ---------- 

242 mu : ndarray 

243 The mean response variables 

244 

245 Returns 

246 ------- 

247 lin_pred : ndarray 

248 Linear predictors based on the mean response variables. The value 

249 of the link function at the given mu. 

250 """ 

251 return self.link(mu) 

252 

253 def loglike_obs(self, endog, mu, var_weights=1., scale=1.): 

254 r""" 

255 The log-likelihood function for each observation in terms of the fitted 

256 mean response for the distribution. 

257 

258 Parameters 

259 ---------- 

260 endog : ndarray 

261 Usually the endogenous response variable. 

262 mu : ndarray 

263 Usually but not always the fitted mean response variable. 

264 var_weights : array_like 

265 1d array of variance (analytic) weights. The default is 1. 

266 scale : float 

267 The scale parameter. The default is 1. 

268 

269 Returns 

270 ------- 

271 ll_i : float 

272 The value of the loglikelihood evaluated at 

273 (endog, mu, var_weights, scale) as defined below. 

274 

275 Notes 

276 ----- 

277 This is defined for each family. endog and mu are not restricted to 

278 ``endog`` and ``mu`` respectively. For instance, you could call 

279 both ``loglike(endog, endog)`` and ``loglike(endog, mu)`` to get the 

280 log-likelihood ratio. 

281 """ 

282 raise NotImplementedError 

283 

284 def loglike(self, endog, mu, var_weights=1., freq_weights=1., scale=1.): 

285 r""" 

286 The log-likelihood function in terms of the fitted mean response. 

287 

288 Parameters 

289 ---------- 

290 endog : ndarray 

291 Usually the endogenous response variable. 

292 mu : ndarray 

293 Usually but not always the fitted mean response variable. 

294 var_weights : array_like 

295 1d array of variance (analytic) weights. The default is 1. 

296 freq_weights : array_like 

297 1d array of frequency weights. The default is 1. 

298 scale : float 

299 The scale parameter. The default is 1. 

300 

301 Returns 

302 ------- 

303 ll : float 

304 The value of the loglikelihood evaluated at 

305 (endog, mu, var_weights, freq_weights, scale) as defined below. 

306 

307 Notes 

308 ----- 

309 Where :math:`ll_i` is the by-observation log-likelihood: 

310 

311 .. math:: 

312 ll = \sum(ll_i * freq\_weights_i) 

313 

314 ``ll_i`` is defined for each family. endog and mu are not restricted 

315 to ``endog`` and ``mu`` respectively. For instance, you could call 

316 both ``loglike(endog, endog)`` and ``loglike(endog, mu)`` to get the 

317 log-likelihood ratio. 

318 """ 

319 ll_obs = self.loglike_obs(endog, mu, var_weights, scale) 

320 return np.sum(ll_obs * freq_weights) 

321 

322 def resid_anscombe(self, endog, mu, var_weights=1., scale=1.): 

323 r""" 

324 The Anscombe residuals 

325 

326 Parameters 

327 ---------- 

328 endog : ndarray 

329 The endogenous response variable 

330 mu : ndarray 

331 The inverse of the link function at the linear predicted values. 

332 var_weights : array_like 

333 1d array of variance (analytic) weights. The default is 1. 

334 scale : float, optional 

335 An optional argument to divide the residuals by sqrt(scale). 

336 The default is 1. 

337 

338 See Also 

339 -------- 

340 statsmodels.genmod.families.family.Family : `resid_anscombe` for the 

341 individual families for more information 

342 

343 Notes 

344 ----- 

345 Anscombe residuals are defined by 

346 

347 .. math:: 

348 resid\_anscombe_i = \frac{A(y)-A(\mu)}{A'(\mu)\sqrt{Var[\mu]}} * 

349 \sqrt(var\_weights) 

350 

351 where :math:`A'(y)=v(y)^{-\frac{1}{3}}` and :math:`v(\mu)` is the 

352 variance function :math:`Var[y]=\frac{\phi}{w}v(mu)`. 

353 The transformation :math:`A(y)` makes the residuals more normal 

354 distributed. 

355 """ 

356 raise NotImplementedError 

357 

358 def _clean(self, x): 

359 """ 

360 Helper function to trim the data so that it is in (0,inf) 

361 

362 Notes 

363 ----- 

364 The need for this function was discovered through usage and its 

365 possible that other families might need a check for validity of the 

366 domain. 

367 """ 

368 return np.clip(x, FLOAT_EPS, np.inf) 

369 

370 

371class Poisson(Family): 

372 """ 

373 Poisson exponential family. 

374 

375 Parameters 

376 ---------- 

377 link : a link instance, optional 

378 The default link for the Poisson family is the log link. Available 

379 links are log, identity, and sqrt. See statsmodels.families.links for 

380 more information. 

381 

382 Attributes 

383 ---------- 

384 Poisson.link : a link instance 

385 The link function of the Poisson instance. 

386 Poisson.variance : varfuncs instance 

387 ``variance`` is an instance of 

388 statsmodels.genmod.families.varfuncs.mu 

389 

390 See Also 

391 -------- 

392 statsmodels.genmod.families.family.Family : Parent class for all links. 

393 :ref:`links` : Further details on links. 

394 """ 

395 links = [L.log, L.identity, L.sqrt] 

396 variance = V.mu 

397 valid = [0, np.inf] 

398 safe_links = [L.Log, ] 

399 

400 def __init__(self, link=None): 

401 if link is None: 

402 link = L.log() 

403 super(Poisson, self).__init__(link=link, variance=Poisson.variance) 

404 

405 def _resid_dev(self, endog, mu): 

406 r""" 

407 Poisson deviance residuals 

408 

409 Parameters 

410 ---------- 

411 endog : ndarray 

412 The endogenous response variable. 

413 mu : ndarray 

414 The inverse of the link function at the linear predicted values. 

415 

416 Returns 

417 ------- 

418 resid_dev : float 

419 Deviance residuals as defined below. 

420 

421 Notes 

422 ----- 

423 .. math:: 

424 

425 resid\_dev_i = 2 * (endog_i * \ln(endog_i / \mu_i) - 

426 (endog_i - \mu_i)) 

427 """ 

428 endog_mu = self._clean(endog / mu) 

429 resid_dev = endog * np.log(endog_mu) - (endog - mu) 

430 return 2 * resid_dev 

431 

432 def loglike_obs(self, endog, mu, var_weights=1., scale=1.): 

433 r""" 

434 The log-likelihood function for each observation in terms of the fitted 

435 mean response for the Poisson distribution. 

436 

437 Parameters 

438 ---------- 

439 endog : ndarray 

440 Usually the endogenous response variable. 

441 mu : ndarray 

442 Usually but not always the fitted mean response variable. 

443 var_weights : array_like 

444 1d array of variance (analytic) weights. The default is 1. 

445 scale : float 

446 The scale parameter. The default is 1. 

447 

448 Returns 

449 ------- 

450 ll_i : float 

451 The value of the loglikelihood evaluated at 

452 (endog, mu, var_weights, scale) as defined below. 

453 

454 Notes 

455 ----- 

456 .. math:: 

457 ll_i = var\_weights_i / scale * (endog_i * \ln(\mu_i) - \mu_i - 

458 \ln \Gamma(endog_i + 1)) 

459 """ 

460 return var_weights / scale * (endog * np.log(mu) - mu - 

461 special.gammaln(endog + 1)) 

462 

463 def resid_anscombe(self, endog, mu, var_weights=1., scale=1.): 

464 r""" 

465 The Anscombe residuals 

466 

467 Parameters 

468 ---------- 

469 endog : ndarray 

470 The endogenous response variable 

471 mu : ndarray 

472 The inverse of the link function at the linear predicted values. 

473 var_weights : array_like 

474 1d array of variance (analytic) weights. The default is 1. 

475 scale : float, optional 

476 An optional argument to divide the residuals by sqrt(scale). 

477 The default is 1. 

478 

479 Returns 

480 ------- 

481 resid_anscombe : ndarray 

482 The Anscombe residuals for the Poisson family defined below 

483 

484 Notes 

485 ----- 

486 .. math:: 

487 

488 resid\_anscombe_i = (3/2) * (endog_i^{2/3} - \mu_i^{2/3}) / 

489 \mu_i^{1/6} * \sqrt(var\_weights) 

490 """ 

491 resid = ((3 / 2.) * (endog**(2 / 3.) - mu**(2 / 3.)) / 

492 (mu ** (1 / 6.) * scale ** 0.5)) 

493 resid *= np.sqrt(var_weights) 

494 return resid 

495 

496 

497class Gaussian(Family): 

498 """ 

499 Gaussian exponential family distribution. 

500 

501 Parameters 

502 ---------- 

503 link : a link instance, optional 

504 The default link for the Gaussian family is the identity link. 

505 Available links are log, identity, and inverse. 

506 See statsmodels.genmod.families.links for more information. 

507 

508 Attributes 

509 ---------- 

510 Gaussian.link : a link instance 

511 The link function of the Gaussian instance 

512 Gaussian.variance : varfunc instance 

513 ``variance`` is an instance of 

514 statsmodels.genmod.families.varfuncs.constant 

515 

516 See Also 

517 -------- 

518 statsmodels.genmod.families.family.Family : Parent class for all links. 

519 :ref:`links` : Further details on links. 

520 """ 

521 

522 links = [L.log, L.identity, L.inverse_power] 

523 variance = V.constant 

524 safe_links = links 

525 

526 def __init__(self, link=None): 

527 if link is None: 

528 link = L.identity() 

529 super(Gaussian, self).__init__(link=link, variance=Gaussian.variance) 

530 

531 def _resid_dev(self, endog, mu): 

532 r""" 

533 Gaussian deviance residuals 

534 

535 Parameters 

536 ---------- 

537 endog : ndarray 

538 The endogenous response variable. 

539 mu : ndarray 

540 The inverse of the link function at the linear predicted values. 

541 

542 Returns 

543 ------- 

544 resid_dev : float 

545 Deviance residuals as defined below. 

546 

547 Notes 

548 -------- 

549 .. math:: 

550 

551 resid\_dev_i = (endog_i - \mu_i) ** 2 

552 """ 

553 return (endog - mu) ** 2 

554 

555 def loglike_obs(self, endog, mu, var_weights=1., scale=1.): 

556 r""" 

557 The log-likelihood function for each observation in terms of the fitted 

558 mean response for the Gaussian distribution. 

559 

560 Parameters 

561 ---------- 

562 endog : ndarray 

563 Usually the endogenous response variable. 

564 mu : ndarray 

565 Usually but not always the fitted mean response variable. 

566 var_weights : array_like 

567 1d array of variance (analytic) weights. The default is 1. 

568 scale : float 

569 The scale parameter. The default is 1. 

570 

571 Returns 

572 ------- 

573 ll_i : float 

574 The value of the loglikelihood evaluated at 

575 (endog, mu, var_weights, scale) as defined below. 

576 

577 Notes 

578 ----- 

579 If the link is the identity link function then the 

580 loglikelihood function is the same as the classical OLS model. 

581 

582 .. math:: 

583 

584 llf = -nobs / 2 * (\log(SSR) + (1 + \log(2 \pi / nobs))) 

585 

586 where 

587 

588 .. math:: 

589 

590 SSR = \sum_i (Y_i - g^{-1}(\mu_i))^2 

591 

592 If the links is not the identity link then the loglikelihood 

593 function is defined as 

594 

595 .. math:: 

596 

597 ll_i = -1 / 2 \sum_i * var\_weights * ((Y_i - mu_i)^2 / scale + 

598 \log(2 * \pi * scale)) 

599 """ 

600 ll_obs = -var_weights * (endog - mu) ** 2 / scale 

601 ll_obs += -np.log(scale / var_weights) - np.log(2 * np.pi) 

602 ll_obs /= 2 

603 return ll_obs 

604 

605 def resid_anscombe(self, endog, mu, var_weights=1., scale=1.): 

606 r""" 

607 The Anscombe residuals 

608 

609 Parameters 

610 ---------- 

611 endog : ndarray 

612 The endogenous response variable 

613 mu : ndarray 

614 The inverse of the link function at the linear predicted values. 

615 var_weights : array_like 

616 1d array of variance (analytic) weights. The default is 1. 

617 scale : float, optional 

618 An optional argument to divide the residuals by sqrt(scale). 

619 The default is 1. 

620 

621 Returns 

622 ------- 

623 resid_anscombe : ndarray 

624 The Anscombe residuals for the Gaussian family defined below 

625 

626 Notes 

627 ----- 

628 For the Gaussian distribution, Anscombe residuals are the same as 

629 deviance residuals. 

630 

631 .. math:: 

632 

633 resid\_anscombe_i = (Y_i - \mu_i) / \sqrt{scale} * 

634 \sqrt(var\_weights) 

635 """ 

636 resid = (endog - mu) / scale ** 0.5 

637 resid *= np.sqrt(var_weights) 

638 return resid 

639 

640 

641class Gamma(Family): 

642 """ 

643 Gamma exponential family distribution. 

644 

645 Parameters 

646 ---------- 

647 link : a link instance, optional 

648 The default link for the Gamma family is the inverse link. 

649 Available links are log, identity, and inverse. 

650 See statsmodels.genmod.families.links for more information. 

651 

652 Attributes 

653 ---------- 

654 Gamma.link : a link instance 

655 The link function of the Gamma instance 

656 Gamma.variance : varfunc instance 

657 ``variance`` is an instance of 

658 statsmodels.genmod.family.varfuncs.mu_squared 

659 

660 See Also 

661 -------- 

662 statsmodels.genmod.families.family.Family : Parent class for all links. 

663 :ref:`links` : Further details on links. 

664 """ 

665 links = [L.log, L.identity, L.inverse_power] 

666 variance = V.mu_squared 

667 safe_links = [L.Log, ] 

668 

669 def __init__(self, link=None): 

670 if link is None: 

671 link = L.inverse_power() 

672 super(Gamma, self).__init__(link=link, variance=Gamma.variance) 

673 

674 def _resid_dev(self, endog, mu): 

675 r""" 

676 Gamma deviance residuals 

677 

678 Parameters 

679 ---------- 

680 endog : ndarray 

681 The endogenous response variable. 

682 mu : ndarray 

683 The inverse of the link function at the linear predicted values. 

684 

685 Returns 

686 ------- 

687 resid_dev : float 

688 Deviance residuals as defined below. 

689 

690 Notes 

691 ----- 

692 .. math:: 

693 

694 resid\_dev_i = 2 * ((endog_i - \mu_i) / \mu_i - 

695 \log(endog_i / \mu_i)) 

696 """ 

697 endog_mu = self._clean(endog / mu) 

698 resid_dev = -np.log(endog_mu) + (endog - mu) / mu 

699 return 2 * resid_dev 

700 

701 def loglike_obs(self, endog, mu, var_weights=1., scale=1.): 

702 r""" 

703 The log-likelihood function for each observation in terms of the fitted 

704 mean response for the Gamma distribution. 

705 

706 Parameters 

707 ---------- 

708 endog : ndarray 

709 Usually the endogenous response variable. 

710 mu : ndarray 

711 Usually but not always the fitted mean response variable. 

712 var_weights : array_like 

713 1d array of variance (analytic) weights. The default is 1. 

714 scale : float 

715 The scale parameter. The default is 1. 

716 

717 Returns 

718 ------- 

719 ll_i : float 

720 The value of the loglikelihood evaluated at 

721 (endog, mu, var_weights, scale) as defined below. 

722 

723 Notes 

724 ----- 

725 .. math:: 

726 

727 ll_i = var\_weights_i / scale * (\ln(var\_weights_i * endog_i / 

728 (scale * \mu_i)) - (var\_weights_i * endog_i) / 

729 (scale * \mu_i)) - \ln \Gamma(var\_weights_i / scale) - \ln(\mu_i) 

730 """ 

731 endog_mu = self._clean(endog / mu) 

732 weight_scale = var_weights / scale 

733 ll_obs = weight_scale * np.log(weight_scale * endog_mu) 

734 ll_obs -= weight_scale * endog_mu 

735 ll_obs -= special.gammaln(weight_scale) + np.log(endog) 

736 return ll_obs 

737 

738 # in Stata scale is set to equal 1 for reporting llf 

739 # in R it's the dispersion, though there is a loss of precision vs. 

740 # our results due to an assumed difference in implementation 

741 

742 def resid_anscombe(self, endog, mu, var_weights=1., scale=1.): 

743 r""" 

744 The Anscombe residuals 

745 

746 Parameters 

747 ---------- 

748 endog : ndarray 

749 The endogenous response variable 

750 mu : ndarray 

751 The inverse of the link function at the linear predicted values. 

752 var_weights : array_like 

753 1d array of variance (analytic) weights. The default is 1. 

754 scale : float, optional 

755 An optional argument to divide the residuals by sqrt(scale). 

756 The default is 1. 

757 

758 Returns 

759 ------- 

760 resid_anscombe : ndarray 

761 The Anscombe residuals for the Gamma family defined below 

762 

763 Notes 

764 ----- 

765 .. math:: 

766 

767 resid\_anscombe_i = 3 * (endog_i^{1/3} - \mu_i^{1/3}) / \mu_i^{1/3} 

768 / \sqrt{scale} * \sqrt(var\_weights) 

769 """ 

770 resid = 3 * (endog**(1/3.) - mu**(1/3.)) / mu**(1/3.) / scale ** 0.5 

771 resid *= np.sqrt(var_weights) 

772 return resid 

773 

774 

775class Binomial(Family): 

776 """ 

777 Binomial exponential family distribution. 

778 

779 Parameters 

780 ---------- 

781 link : a link instance, optional 

782 The default link for the Binomial family is the logit link. 

783 Available links are logit, probit, cauchy, log, and cloglog. 

784 See statsmodels.genmod.families.links for more information. 

785 

786 Attributes 

787 ---------- 

788 Binomial.link : a link instance 

789 The link function of the Binomial instance 

790 Binomial.variance : varfunc instance 

791 ``variance`` is an instance of 

792 statsmodels.genmod.families.varfuncs.binary 

793 

794 See Also 

795 -------- 

796 statsmodels.genmod.families.family.Family : Parent class for all links. 

797 :ref:`links` : Further details on links. 

798 

799 Notes 

800 ----- 

801 endog for Binomial can be specified in one of three ways: 

802 A 1d array of 0 or 1 values, indicating failure or success 

803 respectively. 

804 A 2d array, with two columns. The first column represents the 

805 success count and the second column represents the failure 

806 count. 

807 A 1d array of proportions, indicating the proportion of 

808 successes, with parameter `var_weights` containing the 

809 number of trials for each row. 

810 """ 

811 

812 links = [L.logit, L.probit, L.cauchy, L.log, L.cloglog, L.identity] 

813 variance = V.binary # this is not used below in an effort to include n 

814 

815 # Other safe links, e.g. cloglog and probit are subclasses 

816 safe_links = [L.Logit, L.CDFLink] 

817 

818 def __init__(self, link=None): # , n=1.): 

819 if link is None: 

820 link = L.logit() 

821 # TODO: it *should* work for a constant n>1 actually, if freq_weights 

822 # is equal to n 

823 self.n = 1 

824 # overwritten by initialize if needed but always used to initialize 

825 # variance since endog is assumed/forced to be (0,1) 

826 super(Binomial, self).__init__(link=link, 

827 variance=V.Binomial(n=self.n)) 

828 

829 def starting_mu(self, y): 

830 r""" 

831 The starting values for the IRLS algorithm for the Binomial family. 

832 A good choice for the binomial family is :math:`\mu_0 = (Y_i + 0.5)/2` 

833 """ 

834 return (y + .5)/2 

835 

836 def initialize(self, endog, freq_weights): 

837 ''' 

838 Initialize the response variable. 

839 

840 Parameters 

841 ---------- 

842 endog : ndarray 

843 Endogenous response variable 

844 freq_weights : ndarray 

845 1d array of frequency weights 

846 

847 Returns 

848 ------- 

849 If `endog` is binary, returns `endog` 

850 

851 If `endog` is a 2d array, then the input is assumed to be in the format 

852 (successes, failures) and 

853 successes/(success + failures) is returned. And n is set to 

854 successes + failures. 

855 ''' 

856 # if not np.all(np.asarray(freq_weights) == 1): 

857 # self.variance = V.Binomial(n=freq_weights) 

858 if endog.ndim > 1 and endog.shape[1] > 2: 

859 raise ValueError('endog has more than 2 columns. The Binomial ' 

860 'link supports either a single response variable ' 

861 'or a paired response variable.') 

862 elif endog.ndim > 1 and endog.shape[1] > 1: 

863 y = endog[:, 0] 

864 # overwrite self.freq_weights for deviance below 

865 self.n = endog.sum(1) 

866 return y*1./self.n, self.n 

867 else: 

868 return endog, np.ones(endog.shape[0]) 

869 

870 def _resid_dev(self, endog, mu): 

871 r""" 

872 Binomial deviance residuals 

873 

874 Parameters 

875 ---------- 

876 endog : ndarray 

877 The endogenous response variable. 

878 mu : ndarray 

879 The inverse of the link function at the linear predicted values. 

880 

881 Returns 

882 ------- 

883 resid_dev : float 

884 Deviance residuals as defined below. 

885 

886 Notes 

887 ----- 

888 .. math:: 

889 

890 resid\_dev_i = 2 * n * (endog_i * \ln(endog_i /\mu_i) + 

891 (1 - endog_i) * \ln((1 - endog_i) / (1 - \mu_i))) 

892 """ 

893 endog_mu = self._clean(endog / mu) 

894 n_endog_mu = self._clean((1. - endog) / (1. - mu)) 

895 resid_dev = endog * np.log(endog_mu) + (1 - endog) * np.log(n_endog_mu) 

896 return 2 * self.n * resid_dev 

897 

898 def loglike_obs(self, endog, mu, var_weights=1., scale=1.): 

899 r""" 

900 The log-likelihood function for each observation in terms of the fitted 

901 mean response for the Binomial distribution. 

902 

903 Parameters 

904 ---------- 

905 endog : ndarray 

906 Usually the endogenous response variable. 

907 mu : ndarray 

908 Usually but not always the fitted mean response variable. 

909 var_weights : array_like 

910 1d array of variance (analytic) weights. The default is 1. 

911 scale : float 

912 The scale parameter. The default is 1. 

913 

914 Returns 

915 ------- 

916 ll_i : float 

917 The value of the loglikelihood evaluated at 

918 (endog, mu, var_weights, scale) as defined below. 

919 

920 Notes 

921 ----- 

922 If the endogenous variable is binary: 

923 

924 .. math:: 

925 

926 ll_i = \sum_i (y_i * \log(\mu_i/(1-\mu_i)) + \log(1-\mu_i)) * 

927 var\_weights_i 

928 

929 If the endogenous variable is binomial: 

930 

931 .. math:: 

932 

933 ll_i = \sum_i var\_weights_i * (\ln \Gamma(n+1) - 

934 \ln \Gamma(y_i + 1) - \ln \Gamma(n_i - y_i +1) + y_i * 

935 \log(\mu_i / (n_i - \mu_i)) + n * \log(1 - \mu_i/n_i)) 

936 

937 where :math:`y_i = Y_i * n_i` with :math:`Y_i` and :math:`n_i` as 

938 defined in Binomial initialize. This simply makes :math:`y_i` the 

939 original number of successes. 

940 """ 

941 n = self.n # Number of trials 

942 y = endog * n # Number of successes 

943 

944 # note that mu is still in (0,1), i.e. not converted back 

945 return (special.gammaln(n + 1) - special.gammaln(y + 1) - 

946 special.gammaln(n - y + 1) + y * np.log(mu / (1 - mu)) + 

947 n * np.log(1 - mu)) * var_weights 

948 

949 def resid_anscombe(self, endog, mu, var_weights=1., scale=1.): 

950 r''' 

951 The Anscombe residuals 

952 

953 Parameters 

954 ---------- 

955 endog : ndarray 

956 The endogenous response variable 

957 mu : ndarray 

958 The inverse of the link function at the linear predicted values. 

959 var_weights : array_like 

960 1d array of variance (analytic) weights. The default is 1. 

961 scale : float, optional 

962 An optional argument to divide the residuals by sqrt(scale). 

963 The default is 1. 

964 

965 Returns 

966 ------- 

967 resid_anscombe : ndarray 

968 The Anscombe residuals as defined below. 

969 

970 Notes 

971 ----- 

972 .. math:: 

973 

974 n^{2/3}*(cox\_snell(endog)-cox\_snell(mu)) / 

975 (mu*(1-mu/n)*scale^3)^{1/6} * \sqrt(var\_weights) 

976 

977 where cox_snell is defined as 

978 cox_snell(x) = betainc(2/3., 2/3., x)*betainc(2/3.,2/3.) 

979 where betainc is the incomplete beta function as defined in scipy, 

980 which uses a regularized version (with the unregularized version, one 

981 would just have :math:`cox_snell(x) = Betainc(2/3., 2/3., x)`). 

982 

983 The name 'cox_snell' is idiosyncratic and is simply used for 

984 convenience following the approach suggested in Cox and Snell (1968). 

985 Further note that 

986 :math:`cox\_snell(x) = \frac{3}{2}*x^{2/3} * 

987 hyp2f1(2/3.,1/3.,5/3.,x)` 

988 where hyp2f1 is the hypergeometric 2f1 function. The Anscombe 

989 residuals are sometimes defined in the literature using the 

990 hyp2f1 formulation. Both betainc and hyp2f1 can be found in scipy. 

991 

992 References 

993 ---------- 

994 Anscombe, FJ. (1953) "Contribution to the discussion of H. Hotelling's 

995 paper." Journal of the Royal Statistical Society B. 15, 229-30. 

996 

997 Cox, DR and Snell, EJ. (1968) "A General Definition of Residuals." 

998 Journal of the Royal Statistical Society B. 30, 248-75. 

999 ''' 

1000 endog = endog * self.n # convert back to successes 

1001 mu = mu * self.n # convert back to successes 

1002 

1003 def cox_snell(x): 

1004 return special.betainc(2/3., 2/3., x) * special.beta(2/3., 2/3.) 

1005 

1006 resid = (self.n ** (2/3.) * (cox_snell(endog * 1. / self.n) - 

1007 cox_snell(mu * 1. / self.n)) / 

1008 (mu * (1 - mu * 1. / self.n) * scale ** 3) ** (1 / 6.)) 

1009 resid *= np.sqrt(var_weights) 

1010 return resid 

1011 

1012 

1013class InverseGaussian(Family): 

1014 """ 

1015 InverseGaussian exponential family. 

1016 

1017 Parameters 

1018 ---------- 

1019 link : a link instance, optional 

1020 The default link for the inverse Gaussian family is the 

1021 inverse squared link. 

1022 Available links are inverse_squared, inverse, log, and identity. 

1023 See statsmodels.genmod.families.links for more information. 

1024 

1025 Attributes 

1026 ---------- 

1027 InverseGaussian.link : a link instance 

1028 The link function of the inverse Gaussian instance 

1029 InverseGaussian.variance : varfunc instance 

1030 ``variance`` is an instance of 

1031 statsmodels.genmod.families.varfuncs.mu_cubed 

1032 

1033 See Also 

1034 -------- 

1035 statsmodels.genmod.families.family.Family : Parent class for all links. 

1036 :ref:`links` : Further details on links. 

1037 

1038 Notes 

1039 ----- 

1040 The inverse Gaussian distribution is sometimes referred to in the 

1041 literature as the Wald distribution. 

1042 """ 

1043 

1044 links = [L.inverse_squared, L.inverse_power, L.identity, L.log] 

1045 variance = V.mu_cubed 

1046 safe_links = [L.inverse_squared, L.Log, ] 

1047 

1048 def __init__(self, link=None): 

1049 if link is None: 

1050 link = L.inverse_squared() 

1051 super(InverseGaussian, self).__init__( 

1052 link=link, variance=InverseGaussian.variance) 

1053 

1054 def _resid_dev(self, endog, mu): 

1055 r""" 

1056 Inverse Gaussian deviance residuals 

1057 

1058 Parameters 

1059 ---------- 

1060 endog : ndarray 

1061 The endogenous response variable. 

1062 mu : ndarray 

1063 The inverse of the link function at the linear predicted values. 

1064 

1065 Returns 

1066 ------- 

1067 resid_dev : float 

1068 Deviance residuals as defined below. 

1069 

1070 Notes 

1071 ----- 

1072 .. math:: 

1073 

1074 resid\_dev_i = 1 / (endog_i * \mu_i^2) * (endog_i - \mu_i)^2 

1075 """ 

1076 return 1. / (endog * mu ** 2) * (endog - mu) ** 2 

1077 

1078 def loglike_obs(self, endog, mu, var_weights=1., scale=1.): 

1079 r""" 

1080 The log-likelihood function for each observation in terms of the fitted 

1081 mean response for the Inverse Gaussian distribution. 

1082 

1083 Parameters 

1084 ---------- 

1085 endog : ndarray 

1086 Usually the endogenous response variable. 

1087 mu : ndarray 

1088 Usually but not always the fitted mean response variable. 

1089 var_weights : array_like 

1090 1d array of variance (analytic) weights. The default is 1. 

1091 scale : float 

1092 The scale parameter. The default is 1. 

1093 

1094 Returns 

1095 ------- 

1096 ll_i : float 

1097 The value of the loglikelihood evaluated at 

1098 (endog, mu, var_weights, scale) as defined below. 

1099 

1100 Notes 

1101 ----- 

1102 .. math:: 

1103 

1104 ll_i = -1/2 * (var\_weights_i * (endog_i - \mu_i)^2 / 

1105 (scale * endog_i * \mu_i^2) + \ln(scale * \endog_i^3 / 

1106 var\_weights_i) - \ln(2 * \pi)) 

1107 """ 

1108 ll_obs = -var_weights * (endog - mu) ** 2 / (scale * endog * mu ** 2) 

1109 ll_obs += -np.log(scale * endog ** 3 / var_weights) - np.log(2 * np.pi) 

1110 ll_obs /= 2 

1111 return ll_obs 

1112 

1113 def resid_anscombe(self, endog, mu, var_weights=1., scale=1.): 

1114 r""" 

1115 The Anscombe residuals 

1116 

1117 Parameters 

1118 ---------- 

1119 endog : ndarray 

1120 The endogenous response variable 

1121 mu : ndarray 

1122 The inverse of the link function at the linear predicted values. 

1123 var_weights : array_like 

1124 1d array of variance (analytic) weights. The default is 1. 

1125 scale : float, optional 

1126 An optional argument to divide the residuals by sqrt(scale). 

1127 The default is 1. 

1128 

1129 Returns 

1130 ------- 

1131 resid_anscombe : ndarray 

1132 The Anscombe residuals for the inverse Gaussian distribution as 

1133 defined below 

1134 

1135 Notes 

1136 ----- 

1137 .. math:: 

1138 

1139 resid\_anscombe_i = \log(Y_i / \mu_i) / \sqrt{\mu_i * scale} * 

1140 \sqrt(var\_weights) 

1141 """ 

1142 resid = np.log(endog / mu) / np.sqrt(mu * scale) 

1143 resid *= np.sqrt(var_weights) 

1144 return resid 

1145 

1146 

1147class NegativeBinomial(Family): 

1148 r""" 

1149 Negative Binomial exponential family. 

1150 

1151 Parameters 

1152 ---------- 

1153 link : a link instance, optional 

1154 The default link for the negative binomial family is the log link. 

1155 Available links are log, cloglog, identity, nbinom and power. 

1156 See statsmodels.genmod.families.links for more information. 

1157 alpha : float, optional 

1158 The ancillary parameter for the negative binomial distribution. 

1159 For now ``alpha`` is assumed to be nonstochastic. The default value 

1160 is 1. Permissible values are usually assumed to be between .01 and 2. 

1161 

1162 Attributes 

1163 ---------- 

1164 NegativeBinomial.link : a link instance 

1165 The link function of the negative binomial instance 

1166 NegativeBinomial.variance : varfunc instance 

1167 ``variance`` is an instance of 

1168 statsmodels.genmod.families.varfuncs.nbinom 

1169 

1170 See Also 

1171 -------- 

1172 statsmodels.genmod.families.family.Family : Parent class for all links. 

1173 :ref:`links` : Further details on links. 

1174 

1175 Notes 

1176 ----- 

1177 Power link functions are not yet supported. 

1178 

1179 Parameterization for :math:`y=0, 1, 2, \ldots` is 

1180 

1181 .. math:: 

1182 

1183 f(y) = \frac{\Gamma(y+\frac{1}{\alpha})}{y!\Gamma(\frac{1}{\alpha})} 

1184 \left(\frac{1}{1+\alpha\mu}\right)^{\frac{1}{\alpha}} 

1185 \left(\frac{\alpha\mu}{1+\alpha\mu}\right)^y 

1186 

1187 with :math:`E[Y]=\mu\,` and :math:`Var[Y]=\mu+\alpha\mu^2`. 

1188 """ 

1189 links = [L.log, L.cloglog, L.identity, L.nbinom, L.Power] 

1190 # TODO: add the ability to use the power links with an if test 

1191 # similar to below 

1192 variance = V.nbinom 

1193 safe_links = [L.Log, ] 

1194 

1195 def __init__(self, link=None, alpha=1.): 

1196 self.alpha = 1. * alpha # make it at least float 

1197 if link is None: 

1198 link = L.log() 

1199 super(NegativeBinomial, self).__init__( 

1200 link=link, variance=V.NegativeBinomial(alpha=self.alpha)) 

1201 

1202 def _resid_dev(self, endog, mu): 

1203 r""" 

1204 Negative Binomial deviance residuals 

1205 

1206 Parameters 

1207 ---------- 

1208 endog : ndarray 

1209 The endogenous response variable. 

1210 mu : ndarray 

1211 The inverse of the link function at the linear predicted values. 

1212 

1213 Returns 

1214 ------- 

1215 resid_dev : float 

1216 Deviance residuals as defined below. 

1217 

1218 Notes 

1219 ----- 

1220 .. math:: 

1221 

1222 resid_dev_i = 2 * (endog_i * \ln(endog_i / 

1223 \mu_i) - (endog_i + 1 / \alpha) * \ln((endog_i + 1 / \alpha) / 

1224 (\mu_i + 1 / \alpha))) 

1225 """ 

1226 endog_mu = self._clean(endog / mu) 

1227 endog_alpha = endog + 1 / self.alpha 

1228 mu_alpha = mu + 1 / self.alpha 

1229 resid_dev = endog * np.log(endog_mu) 

1230 resid_dev -= endog_alpha * np.log(endog_alpha / mu_alpha) 

1231 return 2 * resid_dev 

1232 

1233 def loglike_obs(self, endog, mu, var_weights=1., scale=1.): 

1234 r""" 

1235 The log-likelihood function for each observation in terms of the fitted 

1236 mean response for the Negative Binomial distribution. 

1237 

1238 Parameters 

1239 ---------- 

1240 endog : ndarray 

1241 Usually the endogenous response variable. 

1242 mu : ndarray 

1243 Usually but not always the fitted mean response variable. 

1244 var_weights : array_like 

1245 1d array of variance (analytic) weights. The default is 1. 

1246 scale : float 

1247 The scale parameter. The default is 1. 

1248 

1249 Returns 

1250 ------- 

1251 ll_i : float 

1252 The value of the loglikelihood evaluated at 

1253 (endog, mu, var_weights, scale) as defined below. 

1254 

1255 Notes 

1256 ----- 

1257 Defined as: 

1258 

1259 .. math:: 

1260 

1261 llf = \sum_i var\_weights_i / scale * (Y_i * \log{(\alpha * \mu_i / 

1262 (1 + \alpha * \mu_i))} - \log{(1 + \alpha * \mu_i)}/ 

1263 \alpha + Constant) 

1264 

1265 where :math:`Constant` is defined as: 

1266 

1267 .. math:: 

1268 

1269 Constant = \ln \Gamma{(Y_i + 1/ \alpha )} - \ln \Gamma(Y_i + 1) - 

1270 \ln \Gamma{(1/ \alpha )} 

1271 

1272 constant = (special.gammaln(endog + 1 / self.alpha) - 

1273 special.gammaln(endog+1)-special.gammaln(1/self.alpha)) 

1274 return (endog * np.log(self.alpha * mu / (1 + self.alpha * mu)) - 

1275 np.log(1 + self.alpha * mu) / self.alpha + 

1276 constant) * var_weights / scale 

1277 """ 

1278 ll_obs = endog * np.log(self.alpha * mu) 

1279 ll_obs -= (endog + 1 / self.alpha) * np.log(1 + self.alpha * mu) 

1280 ll_obs += special.gammaln(endog + 1 / self.alpha) 

1281 ll_obs -= special.gammaln(1 / self.alpha) 

1282 ll_obs -= special.gammaln(endog + 1) 

1283 return var_weights / scale * ll_obs 

1284 

1285 def resid_anscombe(self, endog, mu, var_weights=1., scale=1.): 

1286 r""" 

1287 The Anscombe residuals 

1288 

1289 Parameters 

1290 ---------- 

1291 endog : ndarray 

1292 The endogenous response variable 

1293 mu : ndarray 

1294 The inverse of the link function at the linear predicted values. 

1295 var_weights : array_like 

1296 1d array of variance (analytic) weights. The default is 1. 

1297 scale : float, optional 

1298 An optional argument to divide the residuals by sqrt(scale). 

1299 The default is 1. 

1300 

1301 Returns 

1302 ------- 

1303 resid_anscombe : ndarray 

1304 The Anscombe residuals as defined below. 

1305 

1306 Notes 

1307 ----- 

1308 Anscombe residuals for Negative Binomial are the same as for Binomial 

1309 upon setting :math:`n=-\frac{1}{\alpha}`. Due to the negative value of 

1310 :math:`-\alpha*Y` the representation with the hypergeometric function 

1311 :math:`H2F1(x) = hyp2f1(2/3.,1/3.,5/3.,x)` is advantageous 

1312 

1313 .. math:: 

1314 

1315 resid\_anscombe_i = \frac{3}{2} * 

1316 (Y_i^(2/3)*H2F1(-\alpha*Y_i) - \mu_i^(2/3)*H2F1(-\alpha*\mu_i)) 

1317 / (\mu_i * (1+\alpha*\mu_i) * scale^3)^(1/6) * \sqrt(var\_weights) 

1318 

1319 Note that for the (unregularized) Beta function, one has 

1320 :math:`Beta(z,a,b) = z^a/a * H2F1(a,1-b,a+1,z)` 

1321 """ 

1322 def hyp2f1(x): 

1323 return special.hyp2f1(2 / 3., 1 / 3., 5 / 3., x) 

1324 

1325 resid = (3 / 2. * (endog ** (2 / 3.) * hyp2f1(-self.alpha * endog) - 

1326 mu ** (2 / 3.) * hyp2f1(-self.alpha * mu)) / 

1327 (mu * (1 + self.alpha * mu) * 

1328 scale ** 3) ** (1 / 6.)) 

1329 resid *= np.sqrt(var_weights) 

1330 return resid 

1331 

1332 

1333class Tweedie(Family): 

1334 """ 

1335 Tweedie family. 

1336 

1337 Parameters 

1338 ---------- 

1339 link : a link instance, optional 

1340 The default link for the Tweedie family is the log link. 

1341 Available links are log and Power. 

1342 See statsmodels.genmod.families.links for more information. 

1343 var_power : float, optional 

1344 The variance power. The default is 1. 

1345 eql : bool 

1346 If True, the Extended Quasi-Likelihood is used, else the 

1347 likelihood is used (however the latter is not implemented). 

1348 If eql is True, var_power must be between 1 and 2. 

1349 

1350 Attributes 

1351 ---------- 

1352 Tweedie.link : a link instance 

1353 The link function of the Tweedie instance 

1354 Tweedie.variance : varfunc instance 

1355 ``variance`` is an instance of 

1356 statsmodels.genmod.families.varfuncs.Power 

1357 Tweedie.var_power : float 

1358 The power of the variance function. 

1359 

1360 See Also 

1361 -------- 

1362 statsmodels.genmod.families.family.Family : Parent class for all links. 

1363 :ref:`links` : Further details on links. 

1364 

1365 Notes 

1366 ----- 

1367 Loglikelihood function not implemented because of the complexity of 

1368 calculating an infinite series of summations. The variance power can be 

1369 estimated using the ``estimate_tweedie_power`` function that is part of the 

1370 statsmodels.genmod.generalized_linear_model.GLM class. 

1371 """ 

1372 links = [L.log, L.Power] 

1373 variance = V.Power(power=1.5) 

1374 safe_links = [L.log, L.Power] 

1375 

1376 def __init__(self, link=None, var_power=1., eql=False): 

1377 self.var_power = var_power 

1378 self.eql = eql 

1379 if eql and (var_power < 1 or var_power > 2): 

1380 raise ValueError("Tweedie: if EQL=True then var_power must fall " 

1381 "between 1 and 2") 

1382 if link is None: 

1383 link = L.log() 

1384 super(Tweedie, self).__init__( 

1385 link=link, variance=V.Power(power=var_power * 1.)) 

1386 

1387 def _resid_dev(self, endog, mu): 

1388 r""" 

1389 Tweedie deviance residuals 

1390 

1391 Parameters 

1392 ---------- 

1393 endog : ndarray 

1394 The endogenous response variable. 

1395 mu : ndarray 

1396 The inverse of the link function at the linear predicted values. 

1397 

1398 Returns 

1399 ------- 

1400 resid_dev : float 

1401 Deviance residuals as defined below. 

1402 

1403 Notes 

1404 ----- 

1405 When :math:`p = 1`, 

1406 

1407 .. math:: 

1408 

1409 dev_i = \mu_i 

1410 

1411 when :math:`endog_i = 0` and 

1412 

1413 .. math:: 

1414 

1415 dev_i = endog_i * \log(endog_i / \mu_i) + (\mu_i - endog_i) 

1416 

1417 otherwise. 

1418 

1419 When :math:`p = 2`, 

1420 

1421 .. math:: 

1422 

1423 dev_i = (endog_i - \mu_i) / \mu_i - \log(endog_i / \mu_i) 

1424 

1425 For all other p, 

1426 

1427 .. math:: 

1428 

1429 dev_i = endog_i^{2 - p} / ((1 - p) * (2 - p)) - 

1430 endog_i * \mu_i^{1 - p} / (1 - p) + \mu_i^{2 - p} / 

1431 (2 - p) 

1432 

1433 The deviance residual is then 

1434 

1435 .. math:: 

1436 

1437 resid\_dev_i = 2 * dev_i 

1438 """ 

1439 p = self.var_power 

1440 if p == 1: 

1441 dev = np.where(endog == 0, 

1442 mu, 

1443 endog * np.log(endog / mu) + (mu - endog)) 

1444 elif p == 2: 

1445 endog1 = self._clean(endog) 

1446 dev = ((endog - mu) / mu) - np.log(endog1 / mu) 

1447 else: 

1448 dev = (endog ** (2 - p) / ((1 - p) * (2 - p)) - 

1449 endog * mu ** (1-p) / (1 - p) + mu ** (2 - p) / (2 - p)) 

1450 return 2 * dev 

1451 

1452 def loglike_obs(self, endog, mu, var_weights=1., scale=1.): 

1453 r""" 

1454 The log-likelihood function for each observation in terms of the fitted 

1455 mean response for the Tweedie distribution. 

1456 

1457 Parameters 

1458 ---------- 

1459 endog : ndarray 

1460 Usually the endogenous response variable. 

1461 mu : ndarray 

1462 Usually but not always the fitted mean response variable. 

1463 var_weights : array_like 

1464 1d array of variance (analytic) weights. The default is 1. 

1465 scale : float 

1466 The scale parameter. The default is 1. 

1467 

1468 Returns 

1469 ------- 

1470 ll_i : float 

1471 The value of the loglikelihood evaluated at 

1472 (endog, mu, var_weights, scale) as defined below. 

1473 

1474 Notes 

1475 ----- 

1476 If eql is True, the Extended Quasi-Likelihood is used. At present, 

1477 this method returns NaN if eql is False. When the actual likelihood 

1478 is implemented, it will be accessible by setting eql to False. 

1479 

1480 References 

1481 ---------- 

1482 JA Nelder, D Pregibon (1987). An extended quasi-likelihood function. 

1483 Biometrika 74:2, pp 221-232. https://www.jstor.org/stable/2336136 

1484 """ 

1485 if not self.eql: 

1486 # We have not yet implemented the actual likelihood 

1487 return np.nan 

1488 

1489 # Equations 9-10 or Nelder and Pregibon 

1490 p = self.var_power 

1491 llf = np.log(2 * np.pi * scale) + p * np.log(mu) - np.log(var_weights) 

1492 llf /= -2 

1493 

1494 if p == 1: 

1495 u = endog * np.log(endog / mu) - (endog - mu) 

1496 u *= var_weights / scale 

1497 elif p == 2: 

1498 yr = endog / mu 

1499 u = yr - np.log(yr) - 1 

1500 u *= var_weights / scale 

1501 else: 

1502 u = (endog ** (2 - p) 

1503 - (2 - p) * endog * mu ** (1 - p) 

1504 + (1 - p) * mu ** (2 - p)) 

1505 u *= var_weights / (scale * (1 - p) * (2 - p)) 

1506 llf -= u 

1507 

1508 return llf 

1509 

1510 def resid_anscombe(self, endog, mu, var_weights=1., scale=1.): 

1511 r""" 

1512 The Anscombe residuals 

1513 

1514 Parameters 

1515 ---------- 

1516 endog : ndarray 

1517 The endogenous response variable 

1518 mu : ndarray 

1519 The inverse of the link function at the linear predicted values. 

1520 var_weights : array_like 

1521 1d array of variance (analytic) weights. The default is 1. 

1522 scale : float, optional 

1523 An optional argument to divide the residuals by sqrt(scale). 

1524 The default is 1. 

1525 

1526 Returns 

1527 ------- 

1528 resid_anscombe : ndarray 

1529 The Anscombe residuals as defined below. 

1530 

1531 Notes 

1532 ----- 

1533 When :math:`p = 3`, then 

1534 

1535 .. math:: 

1536 

1537 resid\_anscombe_i = \log(endog_i / \mu_i) / \sqrt{\mu_i * scale} * 

1538 \sqrt(var\_weights) 

1539 

1540 Otherwise, 

1541 

1542 .. math:: 

1543 

1544 c = (3 - p) / 3 

1545 

1546 .. math:: 

1547 

1548 resid\_anscombe_i = (1 / c) * (endog_i^c - \mu_i^c) / \mu_i^{p / 6} 

1549 / \sqrt{scale} * \sqrt(var\_weights) 

1550 """ 

1551 if self.var_power == 3: 

1552 resid = np.log(endog / mu) / np.sqrt(mu * scale) 

1553 else: 

1554 c = (3. - self.var_power) / 3. 

1555 resid = ((1. / c) * (endog ** c - mu ** c) / 

1556 mu ** (self.var_power / 6.)) / scale ** 0.5 

1557 resid *= np.sqrt(var_weights) 

1558 return resid