Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2#pylint: disable-msg=W0142 

3"""Statistical power, solving for nobs, ... - trial version 

4 

5Created on Sat Jan 12 21:48:06 2013 

6 

7Author: Josef Perktold 

8 

9Example 

10roundtrip - root with respect to all variables 

11 

12 calculated, desired 

13nobs 33.367204205 33.367204205 

14effect 0.5 0.5 

15alpha 0.05 0.05 

16power 0.8 0.8 

17 

18 

19TODO: 

20refactoring 

21 - rename beta -> power, beta (type 2 error is beta = 1-power) DONE 

22 - I think the current implementation can handle any kinds of extra keywords 

23 (except for maybe raising meaningful exceptions 

24 - streamline code, I think internally classes can be merged 

25 how to extend to k-sample tests? 

26 user interface for different tests that map to the same (internal) test class 

27 - sequence of arguments might be inconsistent, 

28 arg and/or kwds so python checks what's required and what can be None. 

29 - templating for docstrings ? 

30 

31 

32""" 

33from statsmodels.compat.python import iteritems 

34import numpy as np 

35from scipy import stats, optimize 

36from statsmodels.tools.rootfinding import brentq_expanding 

37 

38def ttest_power(effect_size, nobs, alpha, df=None, alternative='two-sided'): 

39 '''Calculate power of a ttest 

40 ''' 

41 d = effect_size 

42 if df is None: 

43 df = nobs - 1 

44 

45 if alternative in ['two-sided', '2s']: 

46 alpha_ = alpha / 2. #no inplace changes, does not work 

47 elif alternative in ['smaller', 'larger']: 

48 alpha_ = alpha 

49 else: 

50 raise ValueError("alternative has to be 'two-sided', 'larger' " + 

51 "or 'smaller'") 

52 

53 pow_ = 0 

54 if alternative in ['two-sided', '2s', 'larger']: 

55 crit_upp = stats.t.isf(alpha_, df) 

56 #print crit_upp, df, d*np.sqrt(nobs) 

57 # use private methods, generic methods return nan with negative d 

58 if np.any(np.isnan(crit_upp)): 

59 # avoid endless loop, https://github.com/scipy/scipy/issues/2667 

60 pow_ = np.nan 

61 else: 

62 pow_ = stats.nct._sf(crit_upp, df, d*np.sqrt(nobs)) 

63 if alternative in ['two-sided', '2s', 'smaller']: 

64 crit_low = stats.t.ppf(alpha_, df) 

65 #print crit_low, df, d*np.sqrt(nobs) 

66 if np.any(np.isnan(crit_low)): 

67 pow_ = np.nan 

68 else: 

69 pow_ += stats.nct._cdf(crit_low, df, d*np.sqrt(nobs)) 

70 return pow_ 

71 

72def normal_power(effect_size, nobs, alpha, alternative='two-sided', sigma=1.): 

73 '''Calculate power of a normal distributed test statistic 

74 

75 ''' 

76 d = effect_size 

77 

78 if alternative in ['two-sided', '2s']: 

79 alpha_ = alpha / 2. #no inplace changes, does not work 

80 elif alternative in ['smaller', 'larger']: 

81 alpha_ = alpha 

82 else: 

83 raise ValueError("alternative has to be 'two-sided', 'larger' " + 

84 "or 'smaller'") 

85 

86 pow_ = 0 

87 if alternative in ['two-sided', '2s', 'larger']: 

88 crit = stats.norm.isf(alpha_) 

89 pow_ = stats.norm.sf(crit - d*np.sqrt(nobs)/sigma) 

90 if alternative in ['two-sided', '2s', 'smaller']: 

91 crit = stats.norm.ppf(alpha_) 

92 pow_ += stats.norm.cdf(crit - d*np.sqrt(nobs)/sigma) 

93 return pow_ 

94 

95def ftest_anova_power(effect_size, nobs, alpha, k_groups=2, df=None): 

96 '''power for ftest for one way anova with k equal sized groups 

97 

98 nobs total sample size, sum over all groups 

99 

100 should be general nobs observations, k_groups restrictions ??? 

101 ''' 

102 df_num = nobs - k_groups 

103 df_denom = k_groups - 1 

104 crit = stats.f.isf(alpha, df_denom, df_num) 

105 pow_ = stats.ncf.sf(crit, df_denom, df_num, effect_size**2 * nobs) 

106 return pow_#, crit 

107 

108def ftest_power(effect_size, df_num, df_denom, alpha, ncc=1): 

109 '''Calculate the power of a F-test. 

110 

111 Parameters 

112 ---------- 

113 effect_size : float 

114 standardized effect size, mean divided by the standard deviation. 

115 effect size has to be positive. 

116 df_num : int or float 

117 numerator degrees of freedom. 

118 df_denom : int or float 

119 denominator degrees of freedom. 

120 alpha : float in interval (0,1) 

121 significance level, e.g. 0.05, is the probability of a type I 

122 error, that is wrong rejections if the Null Hypothesis is true. 

123 ncc : int 

124 degrees of freedom correction for non-centrality parameter. 

125 see Notes 

126 

127 Returns 

128 ------- 

129 power : float 

130 Power of the test, e.g. 0.8, is one minus the probability of a 

131 type II error. Power is the probability that the test correctly 

132 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

133 

134 Notes 

135 ----- 

136 

137 sample size is given implicitly by df_num 

138 

139 set ncc=0 to match t-test, or f-test in LikelihoodModelResults. 

140 ncc=1 matches the non-centrality parameter in R::pwr::pwr.f2.test 

141 

142 ftest_power with ncc=0 should also be correct for f_test in regression 

143 models, with df_num and d_denom as defined there. (not verified yet) 

144 ''' 

145 nc = effect_size**2 * (df_denom + df_num + ncc) 

146 crit = stats.f.isf(alpha, df_denom, df_num) 

147 pow_ = stats.ncf.sf(crit, df_denom, df_num, nc) 

148 return pow_ #, crit, nc 

149 

150 

151#class based implementation 

152#-------------------------- 

153 

154class Power(object): 

155 '''Statistical Power calculations, Base Class 

156 

157 so far this could all be class methods 

158 ''' 

159 

160 def __init__(self, **kwds): 

161 self.__dict__.update(kwds) 

162 # used only for instance level start values 

163 self.start_ttp = dict(effect_size=0.01, nobs=10., alpha=0.15, 

164 power=0.6, nobs1=10., ratio=1, 

165 df_num=10, df_denom=3 # for FTestPower 

166 ) 

167 # TODO: nobs1 and ratio are for ttest_ind, 

168 # need start_ttp for each test/class separately, 

169 # possible rootfinding problem for effect_size, starting small seems to 

170 # work 

171 from collections import defaultdict 

172 self.start_bqexp = defaultdict(dict) 

173 for key in ['nobs', 'nobs1', 'df_num', 'df_denom']: 

174 self.start_bqexp[key] = dict(low=2., start_upp=50.) 

175 for key in ['df_denom']: 

176 self.start_bqexp[key] = dict(low=1., start_upp=50.) 

177 for key in ['ratio']: 

178 self.start_bqexp[key] = dict(low=1e-8, start_upp=2) 

179 for key in ['alpha']: 

180 self.start_bqexp[key] = dict(low=1e-12, upp=1 - 1e-12) 

181 

182 def power(self, *args, **kwds): 

183 raise NotImplementedError 

184 

185 def _power_identity(self, *args, **kwds): 

186 power_ = kwds.pop('power') 

187 return self.power(*args, **kwds) - power_ 

188 

189 def solve_power(self, **kwds): 

190 '''solve for any one of the parameters of a t-test 

191 

192 for t-test the keywords are: 

193 effect_size, nobs, alpha, power 

194 

195 exactly one needs to be ``None``, all others need numeric values 

196 

197 *attaches* 

198 

199 cache_fit_res : list 

200 Cache of the result of the root finding procedure for the latest 

201 call to ``solve_power``, mainly for debugging purposes. 

202 The first element is the success indicator, one if successful. 

203 The remaining elements contain the return information of the up to 

204 three solvers that have been tried. 

205 

206 

207 ''' 

208 #TODO: maybe use explicit kwds, 

209 # nicer but requires inspect? and not generic across tests 

210 # I'm duplicating this in the subclass to get informative docstring 

211 key = [k for k,v in iteritems(kwds) if v is None] 

212 #print kwds, key 

213 if len(key) != 1: 

214 raise ValueError('need exactly one keyword that is None') 

215 key = key[0] 

216 

217 if key == 'power': 

218 del kwds['power'] 

219 return self.power(**kwds) 

220 

221 if kwds['effect_size'] == 0: 

222 import warnings 

223 from statsmodels.tools.sm_exceptions import HypothesisTestWarning 

224 warnings.warn('Warning: Effect size of 0 detected', HypothesisTestWarning) 

225 if key == 'power': 

226 return kwds['alpha'] 

227 if key == 'alpha': 

228 return kwds['power'] 

229 else: 

230 raise ValueError('Cannot detect an effect-size of 0. Try changing your effect-size.') 

231 

232 

233 self._counter = 0 

234 

235 def func(x): 

236 kwds[key] = x 

237 fval = self._power_identity(**kwds) 

238 self._counter += 1 

239 #print self._counter, 

240 if self._counter > 500: 

241 raise RuntimeError('possible endless loop (500 NaNs)') 

242 if np.isnan(fval): 

243 return np.inf 

244 else: 

245 return fval 

246 

247 #TODO: I'm using the following so I get a warning when start_ttp is not defined 

248 try: 

249 start_value = self.start_ttp[key] 

250 except KeyError: 

251 start_value = 0.9 

252 import warnings 

253 from statsmodels.tools.sm_exceptions import ValueWarning 

254 warnings.warn('Warning: using default start_value for {0}'.format(key), ValueWarning) 

255 

256 fit_kwds = self.start_bqexp[key] 

257 fit_res = [] 

258 #print vars() 

259 try: 

260 val, res = brentq_expanding(func, full_output=True, **fit_kwds) 

261 failed = False 

262 fit_res.append(res) 

263 except ValueError: 

264 failed = True 

265 fit_res.append(None) 

266 

267 success = None 

268 if (not failed) and res.converged: 

269 success = 1 

270 else: 

271 # try backup 

272 # TODO: check more cases to make this robust 

273 if not np.isnan(start_value): 

274 val, infodict, ier, msg = optimize.fsolve(func, start_value, 

275 full_output=True) #scalar 

276 #val = optimize.newton(func, start_value) #scalar 

277 fval = infodict['fvec'] 

278 fit_res.append(infodict) 

279 else: 

280 ier = -1 

281 fval = 1 

282 fit_res.append([None]) 

283 

284 if ier == 1 and np.abs(fval) < 1e-4 : 

285 success = 1 

286 else: 

287 #print infodict 

288 if key in ['alpha', 'power', 'effect_size']: 

289 val, r = optimize.brentq(func, 1e-8, 1-1e-8, 

290 full_output=True) #scalar 

291 success = 1 if r.converged else 0 

292 fit_res.append(r) 

293 else: 

294 success = 0 

295 

296 if not success == 1: 

297 import warnings 

298 from statsmodels.tools.sm_exceptions import (ConvergenceWarning, 

299 convergence_doc) 

300 warnings.warn(convergence_doc, ConvergenceWarning) 

301 

302 #attach fit_res, for reading only, should be needed only for debugging 

303 fit_res.insert(0, success) 

304 self.cache_fit_res = fit_res 

305 return val 

306 

307 def plot_power(self, dep_var='nobs', nobs=None, effect_size=None, 

308 alpha=0.05, ax=None, title=None, plt_kwds=None, **kwds): 

309 """ 

310 Plot power with number of observations or effect size on x-axis 

311 

312 Parameters 

313 ---------- 

314 dep_var : {'nobs', 'effect_size', 'alpha'} 

315 This specifies which variable is used for the horizontal axis. 

316 If dep_var='nobs' (default), then one curve is created for each 

317 value of ``effect_size``. If dep_var='effect_size' or alpha, then 

318 one curve is created for each value of ``nobs``. 

319 nobs : {scalar, array_like} 

320 specifies the values of the number of observations in the plot 

321 effect_size : {scalar, array_like} 

322 specifies the values of the effect_size in the plot 

323 alpha : {float, array_like} 

324 The significance level (type I error) used in the power 

325 calculation. Can only be more than a scalar, if ``dep_var='alpha'`` 

326 ax : None or axis instance 

327 If ax is None, than a matplotlib figure is created. If ax is a 

328 matplotlib axis instance, then it is reused, and the plot elements 

329 are created with it. 

330 title : str 

331 title for the axis. Use an empty string, ``''``, to avoid a title. 

332 plt_kwds : {None, dict} 

333 not used yet 

334 kwds : dict 

335 These remaining keyword arguments are used as arguments to the 

336 power function. Many power function support ``alternative`` as a 

337 keyword argument, two-sample test support ``ratio``. 

338 

339 Returns 

340 ------- 

341 Figure 

342 If `ax` is None, the created figure. Otherwise the figure to which 

343 `ax` is connected. 

344 

345 Notes 

346 ----- 

347 This works only for classes where the ``power`` method has 

348 ``effect_size``, ``nobs`` and ``alpha`` as the first three arguments. 

349 If the second argument is ``nobs1``, then the number of observations 

350 in the plot are those for the first sample. 

351 TODO: fix this for FTestPower and GofChisquarePower 

352 

353 TODO: maybe add line variable, if we want more than nobs and effectsize 

354 """ 

355 #if pwr_kwds is None: 

356 # pwr_kwds = {} 

357 from statsmodels.graphics import utils 

358 from statsmodels.graphics.plottools import rainbow 

359 fig, ax = utils.create_mpl_ax(ax) 

360 import matplotlib.pyplot as plt 

361 colormap = plt.cm.Dark2 #pylint: disable-msg=E1101 

362 plt_alpha = 1 #0.75 

363 lw = 2 

364 if dep_var == 'nobs': 

365 colors = rainbow(len(effect_size)) 

366 colors = [colormap(i) for i in np.linspace(0, 0.9, len(effect_size))] 

367 for ii, es in enumerate(effect_size): 

368 power = self.power(es, nobs, alpha, **kwds) 

369 ax.plot(nobs, power, lw=lw, alpha=plt_alpha, 

370 color=colors[ii], label='es=%4.2F' % es) 

371 xlabel = 'Number of Observations' 

372 elif dep_var in ['effect size', 'effect_size', 'es']: 

373 colors = rainbow(len(nobs)) 

374 colors = [colormap(i) for i in np.linspace(0, 0.9, len(nobs))] 

375 for ii, n in enumerate(nobs): 

376 power = self.power(effect_size, n, alpha, **kwds) 

377 ax.plot(effect_size, power, lw=lw, alpha=plt_alpha, 

378 color=colors[ii], label='N=%4.2F' % n) 

379 xlabel = 'Effect Size' 

380 elif dep_var in ['alpha']: 

381 # experimental nobs as defining separate lines 

382 colors = rainbow(len(nobs)) 

383 

384 for ii, n in enumerate(nobs): 

385 power = self.power(effect_size, n, alpha, **kwds) 

386 ax.plot(alpha, power, lw=lw, alpha=plt_alpha, 

387 color=colors[ii], label='N=%4.2F' % n) 

388 xlabel = 'alpha' 

389 else: 

390 raise ValueError('depvar not implemented') 

391 

392 if title is None: 

393 title = 'Power of Test' 

394 ax.set_xlabel(xlabel) 

395 ax.set_title(title) 

396 ax.legend(loc='lower right') 

397 return fig 

398 

399 

400class TTestPower(Power): 

401 '''Statistical Power calculations for one sample or paired sample t-test 

402 

403 ''' 

404 

405 def power(self, effect_size, nobs, alpha, df=None, alternative='two-sided'): 

406 '''Calculate the power of a t-test for one sample or paired samples. 

407 

408 Parameters 

409 ---------- 

410 effect_size : float 

411 standardized effect size, mean divided by the standard deviation. 

412 effect size has to be positive. 

413 nobs : int or float 

414 sample size, number of observations. 

415 alpha : float in interval (0,1) 

416 significance level, e.g. 0.05, is the probability of a type I 

417 error, that is wrong rejections if the Null Hypothesis is true. 

418 df : int or float 

419 degrees of freedom. By default this is None, and the df from the 

420 one sample or paired ttest is used, ``df = nobs1 - 1`` 

421 alternative : str, 'two-sided' (default), 'larger', 'smaller' 

422 extra argument to choose whether the power is calculated for a 

423 two-sided (default) or one sided test. The one-sided test can be 

424 either 'larger', 'smaller'. 

425 . 

426 

427 Returns 

428 ------- 

429 power : float 

430 Power of the test, e.g. 0.8, is one minus the probability of a 

431 type II error. Power is the probability that the test correctly 

432 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

433 

434 ''' 

435 # for debugging 

436 #print 'calling ttest power with', (effect_size, nobs, alpha, df, alternative) 

437 return ttest_power(effect_size, nobs, alpha, df=df, 

438 alternative=alternative) 

439 

440 #method is only added to have explicit keywords and docstring 

441 def solve_power(self, effect_size=None, nobs=None, alpha=None, power=None, 

442 alternative='two-sided'): 

443 '''solve for any one parameter of the power of a one sample t-test 

444 

445 for the one sample t-test the keywords are: 

446 effect_size, nobs, alpha, power 

447 

448 Exactly one needs to be ``None``, all others need numeric values. 

449 

450 This test can also be used for a paired t-test, where effect size is 

451 defined in terms of the mean difference, and nobs is the number of 

452 pairs. 

453 

454 Parameters 

455 ---------- 

456 effect_size : float 

457 standardized effect size, mean divided by the standard deviation. 

458 effect size has to be positive. 

459 nobs : int or float 

460 sample size, number of observations. 

461 alpha : float in interval (0,1) 

462 significance level, e.g. 0.05, is the probability of a type I 

463 error, that is wrong rejections if the Null Hypothesis is true. 

464 power : float in interval (0,1) 

465 power of the test, e.g. 0.8, is one minus the probability of a 

466 type II error. Power is the probability that the test correctly 

467 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

468 alternative : str, 'two-sided' (default) or 'one-sided' 

469 extra argument to choose whether the power is calculated for a 

470 two-sided (default) or one sided test. 

471 'one-sided' assumes we are in the relevant tail. 

472 

473 Returns 

474 ------- 

475 value : float 

476 The value of the parameter that was set to None in the call. The 

477 value solves the power equation given the remaining parameters. 

478 

479 *attaches* 

480 

481 cache_fit_res : list 

482 Cache of the result of the root finding procedure for the latest 

483 call to ``solve_power``, mainly for debugging purposes. 

484 The first element is the success indicator, one if successful. 

485 The remaining elements contain the return information of the up to 

486 three solvers that have been tried. 

487 

488 Notes 

489 ----- 

490 The function uses scipy.optimize for finding the value that satisfies 

491 the power equation. It first uses ``brentq`` with a prior search for 

492 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve`` 

493 also fails, then, for ``alpha``, ``power`` and ``effect_size``, 

494 ``brentq`` with fixed bounds is used. However, there can still be cases 

495 where this fails. 

496 

497 ''' 

498 # for debugging 

499 #print 'calling ttest solve with', (effect_size, nobs, alpha, power, alternative) 

500 return super(TTestPower, self).solve_power(effect_size=effect_size, 

501 nobs=nobs, 

502 alpha=alpha, 

503 power=power, 

504 alternative=alternative) 

505 

506class TTestIndPower(Power): 

507 '''Statistical Power calculations for t-test for two independent sample 

508 

509 currently only uses pooled variance 

510 

511 ''' 

512 

513 

514 def power(self, effect_size, nobs1, alpha, ratio=1, df=None, 

515 alternative='two-sided'): 

516 '''Calculate the power of a t-test for two independent sample 

517 

518 Parameters 

519 ---------- 

520 effect_size : float 

521 standardized effect size, difference between the two means divided 

522 by the standard deviation. `effect_size` has to be positive. 

523 nobs1 : int or float 

524 number of observations of sample 1. The number of observations of 

525 sample two is ratio times the size of sample 1, 

526 i.e. ``nobs2 = nobs1 * ratio`` 

527 alpha : float in interval (0,1) 

528 significance level, e.g. 0.05, is the probability of a type I 

529 error, that is wrong rejections if the Null Hypothesis is true. 

530 ratio : float 

531 ratio of the number of observations in sample 2 relative to 

532 sample 1. see description of nobs1 

533 The default for ratio is 1; to solve for ratio given the other 

534 arguments, it has to be explicitly set to None. 

535 df : int or float 

536 degrees of freedom. By default this is None, and the df from the 

537 ttest with pooled variance is used, ``df = (nobs1 - 1 + nobs2 - 1)`` 

538 alternative : str, 'two-sided' (default), 'larger', 'smaller' 

539 extra argument to choose whether the power is calculated for a 

540 two-sided (default) or one sided test. The one-sided test can be 

541 either 'larger', 'smaller'. 

542 

543 Returns 

544 ------- 

545 power : float 

546 Power of the test, e.g. 0.8, is one minus the probability of a 

547 type II error. Power is the probability that the test correctly 

548 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

549 

550 ''' 

551 

552 nobs2 = nobs1*ratio 

553 #pooled variance 

554 if df is None: 

555 df = (nobs1 - 1 + nobs2 - 1) 

556 

557 nobs = 1./ (1. / nobs1 + 1. / nobs2) 

558 #print 'calling ttest power with', (effect_size, nobs, alpha, df, alternative) 

559 return ttest_power(effect_size, nobs, alpha, df=df, alternative=alternative) 

560 

561 #method is only added to have explicit keywords and docstring 

562 def solve_power(self, effect_size=None, nobs1=None, alpha=None, power=None, 

563 ratio=1., alternative='two-sided'): 

564 '''solve for any one parameter of the power of a two sample t-test 

565 

566 for t-test the keywords are: 

567 effect_size, nobs1, alpha, power, ratio 

568 

569 exactly one needs to be ``None``, all others need numeric values 

570 

571 Parameters 

572 ---------- 

573 effect_size : float 

574 standardized effect size, difference between the two means divided 

575 by the standard deviation. `effect_size` has to be positive. 

576 nobs1 : int or float 

577 number of observations of sample 1. The number of observations of 

578 sample two is ratio times the size of sample 1, 

579 i.e. ``nobs2 = nobs1 * ratio`` 

580 alpha : float in interval (0,1) 

581 significance level, e.g. 0.05, is the probability of a type I 

582 error, that is wrong rejections if the Null Hypothesis is true. 

583 power : float in interval (0,1) 

584 power of the test, e.g. 0.8, is one minus the probability of a 

585 type II error. Power is the probability that the test correctly 

586 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

587 ratio : float 

588 ratio of the number of observations in sample 2 relative to 

589 sample 1. see description of nobs1 

590 The default for ratio is 1; to solve for ratio given the other 

591 arguments it has to be explicitly set to None. 

592 alternative : str, 'two-sided' (default), 'larger', 'smaller' 

593 extra argument to choose whether the power is calculated for a 

594 two-sided (default) or one sided test. The one-sided test can be 

595 either 'larger', 'smaller'. 

596 

597 Returns 

598 ------- 

599 value : float 

600 The value of the parameter that was set to None in the call. The 

601 value solves the power equation given the remaining parameters. 

602 

603 

604 Notes 

605 ----- 

606 The function uses scipy.optimize for finding the value that satisfies 

607 the power equation. It first uses ``brentq`` with a prior search for 

608 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve`` 

609 also fails, then, for ``alpha``, ``power`` and ``effect_size``, 

610 ``brentq`` with fixed bounds is used. However, there can still be cases 

611 where this fails. 

612 

613 ''' 

614 return super(TTestIndPower, self).solve_power(effect_size=effect_size, 

615 nobs1=nobs1, 

616 alpha=alpha, 

617 power=power, 

618 ratio=ratio, 

619 alternative=alternative) 

620 

621class NormalIndPower(Power): 

622 '''Statistical Power calculations for z-test for two independent samples. 

623 

624 currently only uses pooled variance 

625 

626 ''' 

627 

628 def __init__(self, ddof=0, **kwds): 

629 self.ddof = ddof 

630 super(NormalIndPower, self).__init__(**kwds) 

631 

632 def power(self, effect_size, nobs1, alpha, ratio=1, 

633 alternative='two-sided'): 

634 '''Calculate the power of a z-test for two independent sample 

635 

636 Parameters 

637 ---------- 

638 effect_size : float 

639 standardized effect size, difference between the two means divided 

640 by the standard deviation. effect size has to be positive. 

641 nobs1 : int or float 

642 number of observations of sample 1. The number of observations of 

643 sample two is ratio times the size of sample 1, 

644 i.e. ``nobs2 = nobs1 * ratio`` 

645 ``ratio`` can be set to zero in order to get the power for a 

646 one sample test. 

647 alpha : float in interval (0,1) 

648 significance level, e.g. 0.05, is the probability of a type I 

649 error, that is wrong rejections if the Null Hypothesis is true. 

650 ratio : float 

651 ratio of the number of observations in sample 2 relative to 

652 sample 1. see description of nobs1 

653 alternative : str, 'two-sided' (default), 'larger', 'smaller' 

654 extra argument to choose whether the power is calculated for a 

655 two-sided (default) or one sided test. The one-sided test can be 

656 either 'larger', 'smaller'. 

657 

658 Returns 

659 ------- 

660 power : float 

661 Power of the test, e.g. 0.8, is one minus the probability of a 

662 type II error. Power is the probability that the test correctly 

663 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

664 

665 ''' 

666 

667 ddof = self.ddof # for correlation, ddof=3 

668 

669 # get effective nobs, factor for std of test statistic 

670 if ratio > 0: 

671 nobs2 = nobs1*ratio 

672 #equivalent to nobs = n1*n2/(n1+n2)=n1*ratio/(1+ratio) 

673 nobs = 1./ (1. / (nobs1 - ddof) + 1. / (nobs2 - ddof)) 

674 else: 

675 nobs = nobs1 - ddof 

676 return normal_power(effect_size, nobs, alpha, alternative=alternative) 

677 

678 #method is only added to have explicit keywords and docstring 

679 def solve_power(self, effect_size=None, nobs1=None, alpha=None, power=None, 

680 ratio=1., alternative='two-sided'): 

681 '''solve for any one parameter of the power of a two sample z-test 

682 

683 for z-test the keywords are: 

684 effect_size, nobs1, alpha, power, ratio 

685 

686 exactly one needs to be ``None``, all others need numeric values 

687 

688 Parameters 

689 ---------- 

690 effect_size : float 

691 standardized effect size, difference between the two means divided 

692 by the standard deviation. 

693 If ratio=0, then this is the standardized mean in the one sample 

694 test. 

695 nobs1 : int or float 

696 number of observations of sample 1. The number of observations of 

697 sample two is ratio times the size of sample 1, 

698 i.e. ``nobs2 = nobs1 * ratio`` 

699 ``ratio`` can be set to zero in order to get the power for a 

700 one sample test. 

701 alpha : float in interval (0,1) 

702 significance level, e.g. 0.05, is the probability of a type I 

703 error, that is wrong rejections if the Null Hypothesis is true. 

704 power : float in interval (0,1) 

705 power of the test, e.g. 0.8, is one minus the probability of a 

706 type II error. Power is the probability that the test correctly 

707 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

708 ratio : float 

709 ratio of the number of observations in sample 2 relative to 

710 sample 1. see description of nobs1 

711 The default for ratio is 1; to solve for ration given the other 

712 arguments it has to be explicitly set to None. 

713 alternative : str, 'two-sided' (default), 'larger', 'smaller' 

714 extra argument to choose whether the power is calculated for a 

715 two-sided (default) or one sided test. The one-sided test can be 

716 either 'larger', 'smaller'. 

717 

718 Returns 

719 ------- 

720 value : float 

721 The value of the parameter that was set to None in the call. The 

722 value solves the power equation given the remaining parameters. 

723 

724 

725 Notes 

726 ----- 

727 The function uses scipy.optimize for finding the value that satisfies 

728 the power equation. It first uses ``brentq`` with a prior search for 

729 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve`` 

730 also fails, then, for ``alpha``, ``power`` and ``effect_size``, 

731 ``brentq`` with fixed bounds is used. However, there can still be cases 

732 where this fails. 

733 

734 ''' 

735 return super(NormalIndPower, self).solve_power(effect_size=effect_size, 

736 nobs1=nobs1, 

737 alpha=alpha, 

738 power=power, 

739 ratio=ratio, 

740 alternative=alternative) 

741 

742 

743class FTestPower(Power): 

744 '''Statistical Power calculations for generic F-test 

745 

746 ''' 

747 

748 def power(self, effect_size, df_num, df_denom, alpha, ncc=1): 

749 '''Calculate the power of a F-test. 

750 

751 Parameters 

752 ---------- 

753 effect_size : float 

754 standardized effect size, mean divided by the standard deviation. 

755 effect size has to be positive. 

756 df_num : int or float 

757 numerator degrees of freedom. 

758 df_denom : int or float 

759 denominator degrees of freedom. 

760 alpha : float in interval (0,1) 

761 significance level, e.g. 0.05, is the probability of a type I 

762 error, that is wrong rejections if the Null Hypothesis is true. 

763 ncc : int 

764 degrees of freedom correction for non-centrality parameter. 

765 see Notes 

766 

767 Returns 

768 ------- 

769 power : float 

770 Power of the test, e.g. 0.8, is one minus the probability of a 

771 type II error. Power is the probability that the test correctly 

772 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

773 

774 Notes 

775 ----- 

776 

777 sample size is given implicitly by df_num 

778 

779 set ncc=0 to match t-test, or f-test in LikelihoodModelResults. 

780 ncc=1 matches the non-centrality parameter in R::pwr::pwr.f2.test 

781 

782 ftest_power with ncc=0 should also be correct for f_test in regression 

783 models, with df_num and d_denom as defined there. (not verified yet) 

784 ''' 

785 

786 pow_ = ftest_power(effect_size, df_num, df_denom, alpha, ncc=ncc) 

787 #print effect_size, df_num, df_denom, alpha, pow_ 

788 return pow_ 

789 

790 #method is only added to have explicit keywords and docstring 

791 def solve_power(self, effect_size=None, df_num=None, df_denom=None, 

792 nobs=None, alpha=None, power=None, ncc=1): 

793 '''solve for any one parameter of the power of a F-test 

794 

795 for the one sample F-test the keywords are: 

796 effect_size, df_num, df_denom, alpha, power 

797 

798 Exactly one needs to be ``None``, all others need numeric values. 

799 

800 

801 Parameters 

802 ---------- 

803 effect_size : float 

804 standardized effect size, mean divided by the standard deviation. 

805 effect size has to be positive. 

806 nobs : int or float 

807 sample size, number of observations. 

808 alpha : float in interval (0,1) 

809 significance level, e.g. 0.05, is the probability of a type I 

810 error, that is wrong rejections if the Null Hypothesis is true. 

811 power : float in interval (0,1) 

812 power of the test, e.g. 0.8, is one minus the probability of a 

813 type II error. Power is the probability that the test correctly 

814 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

815 alternative : str, 'two-sided' (default) or 'one-sided' 

816 extra argument to choose whether the power is calculated for a 

817 two-sided (default) or one sided test. 

818 'one-sided' assumes we are in the relevant tail. 

819 

820 Returns 

821 ------- 

822 value : float 

823 The value of the parameter that was set to None in the call. The 

824 value solves the power equation given the remaining parameters. 

825 

826 

827 Notes 

828 ----- 

829 The function uses scipy.optimize for finding the value that satisfies 

830 the power equation. It first uses ``brentq`` with a prior search for 

831 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve`` 

832 also fails, then, for ``alpha``, ``power`` and ``effect_size``, 

833 ``brentq`` with fixed bounds is used. However, there can still be cases 

834 where this fails. 

835 

836 ''' 

837 return super(FTestPower, self).solve_power(effect_size=effect_size, 

838 df_num=df_num, 

839 df_denom=df_denom, 

840 alpha=alpha, 

841 power=power, 

842 ncc=ncc) 

843 

844class FTestAnovaPower(Power): 

845 '''Statistical Power calculations F-test for one factor balanced ANOVA 

846 

847 ''' 

848 

849 def power(self, effect_size, nobs, alpha, k_groups=2): 

850 '''Calculate the power of a F-test for one factor ANOVA. 

851 

852 Parameters 

853 ---------- 

854 effect_size : float 

855 standardized effect size, mean divided by the standard deviation. 

856 effect size has to be positive. 

857 nobs : int or float 

858 sample size, number of observations. 

859 alpha : float in interval (0,1) 

860 significance level, e.g. 0.05, is the probability of a type I 

861 error, that is wrong rejections if the Null Hypothesis is true. 

862 k_groups : int or float 

863 number of groups in the ANOVA or k-sample comparison. Default is 2. 

864 

865 Returns 

866 ------- 

867 power : float 

868 Power of the test, e.g. 0.8, is one minus the probability of a 

869 type II error. Power is the probability that the test correctly 

870 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

871 

872 ''' 

873 return ftest_anova_power(effect_size, nobs, alpha, k_groups=k_groups) 

874 

875 #method is only added to have explicit keywords and docstring 

876 def solve_power(self, effect_size=None, nobs=None, alpha=None, power=None, 

877 k_groups=2): 

878 '''solve for any one parameter of the power of a F-test 

879 

880 for the one sample F-test the keywords are: 

881 effect_size, nobs, alpha, power 

882 

883 Exactly one needs to be ``None``, all others need numeric values. 

884 

885 

886 Parameters 

887 ---------- 

888 effect_size : float 

889 standardized effect size, mean divided by the standard deviation. 

890 effect size has to be positive. 

891 nobs : int or float 

892 sample size, number of observations. 

893 alpha : float in interval (0,1) 

894 significance level, e.g. 0.05, is the probability of a type I 

895 error, that is wrong rejections if the Null Hypothesis is true. 

896 power : float in interval (0,1) 

897 power of the test, e.g. 0.8, is one minus the probability of a 

898 type II error. Power is the probability that the test correctly 

899 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

900 

901 Returns 

902 ------- 

903 value : float 

904 The value of the parameter that was set to None in the call. The 

905 value solves the power equation given the remaining parameters. 

906 

907 

908 Notes 

909 ----- 

910 The function uses scipy.optimize for finding the value that satisfies 

911 the power equation. It first uses ``brentq`` with a prior search for 

912 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve`` 

913 also fails, then, for ``alpha``, ``power`` and ``effect_size``, 

914 ``brentq`` with fixed bounds is used. However, there can still be cases 

915 where this fails. 

916 

917 ''' 

918 # update start values for root finding 

919 if k_groups is not None: 

920 self.start_ttp['nobs'] = k_groups * 10 

921 self.start_bqexp['nobs'] = dict(low=k_groups * 2, 

922 start_upp=k_groups * 10) 

923 # first attempt at special casing 

924 if effect_size is None: 

925 return self._solve_effect_size(effect_size=effect_size, 

926 nobs=nobs, 

927 alpha=alpha, 

928 k_groups=k_groups, 

929 power=power) 

930 

931 return super(FTestAnovaPower, self).solve_power(effect_size=effect_size, 

932 nobs=nobs, 

933 alpha=alpha, 

934 k_groups=k_groups, 

935 power=power) 

936 

937 def _solve_effect_size(self, effect_size=None, nobs=None, alpha=None, 

938 power=None, k_groups=2): 

939 '''experimental, test failure in solve_power for effect_size 

940 ''' 

941 def func(x): 

942 effect_size = x 

943 return self._power_identity(effect_size=effect_size, 

944 nobs=nobs, 

945 alpha=alpha, 

946 k_groups=k_groups, 

947 power=power) 

948 

949 val, r = optimize.brentq(func, 1e-8, 1-1e-8, full_output=True) 

950 if not r.converged: 

951 print(r) 

952 return val 

953 

954 

955class GofChisquarePower(Power): 

956 '''Statistical Power calculations for one sample chisquare test 

957 

958 ''' 

959 

960 def power(self, effect_size, nobs, alpha, n_bins, ddof=0):#alternative='two-sided'): 

961 '''Calculate the power of a chisquare test for one sample 

962 

963 Only two-sided alternative is implemented 

964 

965 Parameters 

966 ---------- 

967 effect_size : float 

968 standardized effect size, according to Cohen's definition. 

969 see :func:`statsmodels.stats.gof.chisquare_effectsize` 

970 nobs : int or float 

971 sample size, number of observations. 

972 alpha : float in interval (0,1) 

973 significance level, e.g. 0.05, is the probability of a type I 

974 error, that is wrong rejections if the Null Hypothesis is true. 

975 n_bins : int 

976 number of bins or cells in the distribution. 

977 

978 Returns 

979 ------- 

980 power : float 

981 Power of the test, e.g. 0.8, is one minus the probability of a 

982 type II error. Power is the probability that the test correctly 

983 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

984 

985 ''' 

986 from statsmodels.stats.gof import chisquare_power 

987 return chisquare_power(effect_size, nobs, n_bins, alpha, ddof=0) 

988 

989 #method is only added to have explicit keywords and docstring 

990 def solve_power(self, effect_size=None, nobs=None, alpha=None, 

991 power=None, n_bins=2): 

992 '''solve for any one parameter of the power of a one sample chisquare-test 

993 

994 for the one sample chisquare-test the keywords are: 

995 effect_size, nobs, alpha, power 

996 

997 Exactly one needs to be ``None``, all others need numeric values. 

998 

999 n_bins needs to be defined, a default=2 is used. 

1000 

1001 

1002 Parameters 

1003 ---------- 

1004 effect_size : float 

1005 standardized effect size, according to Cohen's definition. 

1006 see :func:`statsmodels.stats.gof.chisquare_effectsize` 

1007 nobs : int or float 

1008 sample size, number of observations. 

1009 alpha : float in interval (0,1) 

1010 significance level, e.g. 0.05, is the probability of a type I 

1011 error, that is wrong rejections if the Null Hypothesis is true. 

1012 power : float in interval (0,1) 

1013 power of the test, e.g. 0.8, is one minus the probability of a 

1014 type II error. Power is the probability that the test correctly 

1015 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

1016 n_bins : int 

1017 number of bins or cells in the distribution 

1018 

1019 Returns 

1020 ------- 

1021 value : float 

1022 The value of the parameter that was set to None in the call. The 

1023 value solves the power equation given the remaining parameters. 

1024 

1025 

1026 Notes 

1027 ----- 

1028 The function uses scipy.optimize for finding the value that satisfies 

1029 the power equation. It first uses ``brentq`` with a prior search for 

1030 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve`` 

1031 also fails, then, for ``alpha``, ``power`` and ``effect_size``, 

1032 ``brentq`` with fixed bounds is used. However, there can still be cases 

1033 where this fails. 

1034 

1035 ''' 

1036 return super(GofChisquarePower, self).solve_power(effect_size=effect_size, 

1037 nobs=nobs, 

1038 n_bins=n_bins, 

1039 alpha=alpha, 

1040 power=power) 

1041 

1042class _GofChisquareIndPower(Power): 

1043 '''Statistical Power calculations for chisquare goodness-of-fit test 

1044 

1045 TODO: this is not working yet 

1046 for 2sample case need two nobs in function 

1047 no one-sided chisquare test, is there one? use normal distribution? 

1048 -> drop one-sided options? 

1049 ''' 

1050 

1051 

1052 def power(self, effect_size, nobs1, alpha, ratio=1, 

1053 alternative='two-sided'): 

1054 '''Calculate the power of a chisquare for two independent sample 

1055 

1056 Parameters 

1057 ---------- 

1058 effect_size : float 

1059 standardize effect size, difference between the two means divided 

1060 by the standard deviation. effect size has to be positive. 

1061 nobs1 : int or float 

1062 number of observations of sample 1. The number of observations of 

1063 sample two is ratio times the size of sample 1, 

1064 i.e. ``nobs2 = nobs1 * ratio`` 

1065 alpha : float in interval (0,1) 

1066 significance level, e.g. 0.05, is the probability of a type I 

1067 error, that is wrong rejections if the Null Hypothesis is true. 

1068 ratio : float 

1069 ratio of the number of observations in sample 2 relative to 

1070 sample 1. see description of nobs1 

1071 The default for ratio is 1; to solve for ration given the other 

1072 arguments it has to be explicitely set to None. 

1073 alternative : str, 'two-sided' (default) or 'one-sided' 

1074 extra argument to choose whether the power is calculated for a 

1075 two-sided (default) or one sided test. 

1076 'one-sided' assumes we are in the relevant tail. 

1077 

1078 Returns 

1079 ------- 

1080 power : float 

1081 Power of the test, e.g. 0.8, is one minus the probability of a 

1082 type II error. Power is the probability that the test correctly 

1083 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

1084 

1085 ''' 

1086 

1087 from statsmodels.stats.gof import chisquare_power 

1088 nobs2 = nobs1*ratio 

1089 #equivalent to nobs = n1*n2/(n1+n2)=n1*ratio/(1+ratio) 

1090 nobs = 1./ (1. / nobs1 + 1. / nobs2) 

1091 return chisquare_power(effect_size, nobs, alpha) 

1092 

1093 #method is only added to have explicit keywords and docstring 

1094 def solve_power(self, effect_size=None, nobs1=None, alpha=None, power=None, 

1095 ratio=1., alternative='two-sided'): 

1096 '''solve for any one parameter of the power of a two sample z-test 

1097 

1098 for z-test the keywords are: 

1099 effect_size, nobs1, alpha, power, ratio 

1100 

1101 exactly one needs to be ``None``, all others need numeric values 

1102 

1103 Parameters 

1104 ---------- 

1105 effect_size : float 

1106 standardize effect size, difference between the two means divided 

1107 by the standard deviation. 

1108 nobs1 : int or float 

1109 number of observations of sample 1. The number of observations of 

1110 sample two is ratio times the size of sample 1, 

1111 i.e. ``nobs2 = nobs1 * ratio`` 

1112 alpha : float in interval (0,1) 

1113 significance level, e.g. 0.05, is the probability of a type I 

1114 error, that is wrong rejections if the Null Hypothesis is true. 

1115 power : float in interval (0,1) 

1116 power of the test, e.g. 0.8, is one minus the probability of a 

1117 type II error. Power is the probability that the test correctly 

1118 rejects the Null Hypothesis if the Alternative Hypothesis is true. 

1119 ratio : float 

1120 ratio of the number of observations in sample 2 relative to 

1121 sample 1. see description of nobs1 

1122 The default for ratio is 1; to solve for ration given the other 

1123 arguments it has to be explicitely set to None. 

1124 alternative : str, 'two-sided' (default) or 'one-sided' 

1125 extra argument to choose whether the power is calculated for a 

1126 two-sided (default) or one sided test. 

1127 'one-sided' assumes we are in the relevant tail. 

1128 

1129 Returns 

1130 ------- 

1131 value : float 

1132 The value of the parameter that was set to None in the call. The 

1133 value solves the power equation given the remaining parameters. 

1134 

1135 

1136 Notes 

1137 ----- 

1138 The function uses scipy.optimize for finding the value that satisfies 

1139 the power equation. It first uses ``brentq`` with a prior search for 

1140 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve`` 

1141 also fails, then, for ``alpha``, ``power`` and ``effect_size``, 

1142 ``brentq`` with fixed bounds is used. However, there can still be cases 

1143 where this fails. 

1144 

1145 ''' 

1146 return super(_GofChisquareIndPower, self).solve_power(effect_size=effect_size, 

1147 nobs1=nobs1, 

1148 alpha=alpha, 

1149 power=power, 

1150 ratio=ratio, 

1151 alternative=alternative) 

1152 

1153#shortcut functions 

1154tt_solve_power = TTestPower().solve_power 

1155tt_ind_solve_power = TTestIndPower().solve_power 

1156zt_ind_solve_power = NormalIndPower().solve_power