Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2"""Tests and Confidence Intervals for Binomial Proportions 

3 

4Created on Fri Mar 01 00:23:07 2013 

5 

6Author: Josef Perktold 

7License: BSD-3 

8""" 

9from statsmodels.compat.python import lzip 

10import numpy as np 

11from scipy import stats, optimize 

12from sys import float_info 

13 

14from statsmodels.stats.base import AllPairsResults 

15from statsmodels.tools.sm_exceptions import HypothesisTestWarning 

16 

17 

18def proportion_confint(count, nobs, alpha=0.05, method='normal'): 

19 '''confidence interval for a binomial proportion 

20 

21 Parameters 

22 ---------- 

23 count : int or array_array_like 

24 number of successes, can be pandas Series or DataFrame 

25 nobs : int 

26 total number of trials 

27 alpha : float in (0, 1) 

28 significance level, default 0.05 

29 method : {'normal', 'agresti_coull', 'beta', 'wilson', 'binom_test'} 

30 default: 'normal' 

31 method to use for confidence interval, 

32 currently available methods : 

33 

34 - `normal` : asymptotic normal approximation 

35 - `agresti_coull` : Agresti-Coull interval 

36 - `beta` : Clopper-Pearson interval based on Beta distribution 

37 - `wilson` : Wilson Score interval 

38 - `jeffreys` : Jeffreys Bayesian Interval 

39 - `binom_test` : experimental, inversion of binom_test 

40 

41 Returns 

42 ------- 

43 ci_low, ci_upp : float, ndarray, or pandas Series or DataFrame 

44 lower and upper confidence level with coverage (approximately) 1-alpha. 

45 When a pandas object is returned, then the index is taken from the 

46 `count`. 

47 

48 Notes 

49 ----- 

50 Beta, the Clopper-Pearson exact interval has coverage at least 1-alpha, 

51 but is in general conservative. Most of the other methods have average 

52 coverage equal to 1-alpha, but will have smaller coverage in some cases. 

53 

54 The 'beta' and 'jeffreys' interval are central, they use alpha/2 in each 

55 tail, and alpha is not adjusted at the boundaries. In the extreme case 

56 when `count` is zero or equal to `nobs`, then the coverage will be only 

57 1 - alpha/2 in the case of 'beta'. 

58 

59 The confidence intervals are clipped to be in the [0, 1] interval in the 

60 case of 'normal' and 'agresti_coull'. 

61 

62 Method "binom_test" directly inverts the binomial test in scipy.stats. 

63 which has discrete steps. 

64 

65 TODO: binom_test intervals raise an exception in small samples if one 

66 interval bound is close to zero or one. 

67 

68 References 

69 ---------- 

70 https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval 

71 

72 Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban (2001). "Interval 

73 Estimation for a Binomial Proportion", 

74 Statistical Science 16 (2): 101–133. doi:10.1214/ss/1009213286. 

75 TODO: Is this the correct one ? 

76 

77 ''' 

78 

79 pd_index = getattr(count, 'index', None) 

80 if pd_index is not None and callable(pd_index): 

81 # this rules out lists, lists have an index method 

82 pd_index = None 

83 count = np.asarray(count) 

84 nobs = np.asarray(nobs) 

85 

86 q_ = count * 1. / nobs 

87 alpha_2 = 0.5 * alpha 

88 

89 if method == 'normal': 

90 std_ = np.sqrt(q_ * (1 - q_) / nobs) 

91 dist = stats.norm.isf(alpha / 2.) * std_ 

92 ci_low = q_ - dist 

93 ci_upp = q_ + dist 

94 

95 elif method == 'binom_test': 

96 # inverting the binomial test 

97 def func(qi): 

98 return stats.binom_test(q_ * nobs, nobs, p=qi) - alpha 

99 if count == 0: 

100 ci_low = 0 

101 else: 

102 ci_low = optimize.brentq(func, float_info.min, q_) 

103 if count == nobs: 

104 ci_upp = 1 

105 else: 

106 ci_upp = optimize.brentq(func, q_, 1. - float_info.epsilon) 

107 

108 elif method == 'beta': 

109 ci_low = stats.beta.ppf(alpha_2, count, nobs - count + 1) 

110 ci_upp = stats.beta.isf(alpha_2, count + 1, nobs - count) 

111 

112 if np.ndim(ci_low) > 0: 

113 ci_low[q_ == 0] = 0 

114 ci_upp[q_ == 1] = 1 

115 else: 

116 ci_low = ci_low if (q_ != 0) else 0 

117 ci_upp = ci_upp if (q_ != 1) else 1 

118 

119 elif method == 'agresti_coull': 

120 crit = stats.norm.isf(alpha / 2.) 

121 nobs_c = nobs + crit**2 

122 q_c = (count + crit**2 / 2.) / nobs_c 

123 std_c = np.sqrt(q_c * (1. - q_c) / nobs_c) 

124 dist = crit * std_c 

125 ci_low = q_c - dist 

126 ci_upp = q_c + dist 

127 

128 elif method == 'wilson': 

129 crit = stats.norm.isf(alpha / 2.) 

130 crit2 = crit**2 

131 denom = 1 + crit2 / nobs 

132 center = (q_ + crit2 / (2 * nobs)) / denom 

133 dist = crit * np.sqrt(q_ * (1. - q_) / nobs + crit2 / (4. * nobs**2)) 

134 dist /= denom 

135 ci_low = center - dist 

136 ci_upp = center + dist 

137 

138 # method adjusted to be more forgiving of misspellings or incorrect option name 

139 elif method[:4] == 'jeff': 

140 ci_low, ci_upp = stats.beta.interval(1 - alpha, count + 0.5, 

141 nobs - count + 0.5) 

142 

143 else: 

144 raise NotImplementedError('method "%s" is not available' % method) 

145 

146 if method in ['normal', 'agresti_coull']: 

147 ci_low = np.clip(ci_low, 0, 1) 

148 ci_upp = np.clip(ci_upp, 0, 1) 

149 if pd_index is not None and np.ndim(ci_low) > 0: 

150 import pandas as pd 

151 if np.ndim(ci_low) == 1: 

152 ci_low = pd.Series(ci_low, index=pd_index) 

153 ci_upp = pd.Series(ci_upp, index=pd_index) 

154 if np.ndim(ci_low) == 2: 

155 ci_low = pd.DataFrame(ci_low, index=pd_index) 

156 ci_upp = pd.DataFrame(ci_upp, index=pd_index) 

157 

158 return ci_low, ci_upp 

159 

160 

161def multinomial_proportions_confint(counts, alpha=0.05, method='goodman'): 

162 '''Confidence intervals for multinomial proportions. 

163 

164 Parameters 

165 ---------- 

166 counts : array_like of int, 1-D 

167 Number of observations in each category. 

168 alpha : float in (0, 1), optional 

169 Significance level, defaults to 0.05. 

170 method : {'goodman', 'sison-glaz'}, optional 

171 Method to use to compute the confidence intervals; available methods 

172 are: 

173 

174 - `goodman`: based on a chi-squared approximation, valid if all 

175 values in `counts` are greater or equal to 5 [2]_ 

176 - `sison-glaz`: less conservative than `goodman`, but only valid if 

177 `counts` has 7 or more categories (``len(counts) >= 7``) [3]_ 

178 

179 Returns 

180 ------- 

181 confint : ndarray, 2-D 

182 Array of [lower, upper] confidence levels for each category, such that 

183 overall coverage is (approximately) `1-alpha`. 

184 

185 Raises 

186 ------ 

187 ValueError 

188 If `alpha` is not in `(0, 1)` (bounds excluded), or if the values in 

189 `counts` are not all positive or null. 

190 NotImplementedError 

191 If `method` is not kown. 

192 Exception 

193 When ``method == 'sison-glaz'``, if for some reason `c` cannot be 

194 computed; this signals a bug and should be reported. 

195 

196 Notes 

197 ----- 

198 The `goodman` method [2]_ is based on approximating a statistic based on 

199 the multinomial as a chi-squared random variable. The usual recommendation 

200 is that this is valid if all the values in `counts` are greater than or 

201 equal to 5. There is no condition on the number of categories for this 

202 method. 

203 

204 The `sison-glaz` method [3]_ approximates the multinomial probabilities, 

205 and evaluates that with a maximum-likelihood estimator. The first 

206 approximation is an Edgeworth expansion that converges when the number of 

207 categories goes to infinity, and the maximum-likelihood estimator converges 

208 when the number of observations (``sum(counts)``) goes to infinity. In 

209 their paper, Sison & Glaz demo their method with at least 7 categories, so 

210 ``len(counts) >= 7`` with all values in `counts` at or above 5 can be used 

211 as a rule of thumb for the validity of this method. This method is less 

212 conservative than the `goodman` method (i.e. it will yield confidence 

213 intervals closer to the desired significance level), but produces 

214 confidence intervals of uniform width over all categories (except when the 

215 intervals reach 0 or 1, in which case they are truncated), which makes it 

216 most useful when proportions are of similar magnitude. 

217 

218 Aside from the original sources ([1]_, [2]_, and [3]_), the implementation 

219 uses the formulas (though not the code) presented in [4]_ and [5]_. 

220 

221 References 

222 ---------- 

223 .. [1] Levin, Bruce, "A representation for multinomial cumulative 

224 distribution functions," The Annals of Statistics, Vol. 9, No. 5, 

225 1981, pp. 1123-1126. 

226 

227 .. [2] Goodman, L.A., "On simultaneous confidence intervals for multinomial 

228 proportions," Technometrics, Vol. 7, No. 2, 1965, pp. 247-254. 

229 

230 .. [3] Sison, Cristina P., and Joseph Glaz, "Simultaneous Confidence 

231 Intervals and Sample Size Determination for Multinomial 

232 Proportions," Journal of the American Statistical Association, 

233 Vol. 90, No. 429, 1995, pp. 366-369. 

234 

235 .. [4] May, Warren L., and William D. Johnson, "A SAS® macro for 

236 constructing simultaneous confidence intervals for multinomial 

237 proportions," Computer methods and programs in Biomedicine, Vol. 53, 

238 No. 3, 1997, pp. 153-162. 

239 

240 .. [5] May, Warren L., and William D. Johnson, "Constructing two-sided 

241 simultaneous confidence intervals for multinomial proportions for 

242 small counts in a large number of cells," Journal of Statistical 

243 Software, Vol. 5, No. 6, 2000, pp. 1-24. 

244 ''' 

245 if alpha <= 0 or alpha >= 1: 

246 raise ValueError('alpha must be in (0, 1), bounds excluded') 

247 counts = np.array(counts, dtype=np.float) 

248 if (counts < 0).any(): 

249 raise ValueError('counts must be >= 0') 

250 

251 n = counts.sum() 

252 k = len(counts) 

253 proportions = counts / n 

254 if method == 'goodman': 

255 chi2 = stats.chi2.ppf(1 - alpha / k, 1) 

256 delta = chi2 ** 2 + (4 * n * proportions * chi2 * (1 - proportions)) 

257 region = ((2 * n * proportions + chi2 + 

258 np.array([- np.sqrt(delta), np.sqrt(delta)])) / 

259 (2 * (chi2 + n))).T 

260 elif method[:5] == 'sison': # We accept any name starting with 'sison' 

261 # Define a few functions we'll use a lot. 

262 def poisson_interval(interval, p): 

263 """Compute P(b <= Z <= a) where Z ~ Poisson(p) and 

264 `interval = (b, a)`.""" 

265 b, a = interval 

266 prob = stats.poisson.cdf(a, p) - stats.poisson.cdf(b - 1, p) 

267 if p == 0 and np.isnan(prob): 

268 # hack for older scipy <=0.16.1 

269 return int(b - 1 < 0) 

270 return prob 

271 

272 def truncated_poisson_factorial_moment(interval, r, p): 

273 """Compute mu_r, the r-th factorial moment of a poisson random 

274 variable of parameter `p` truncated to `interval = (b, a)`.""" 

275 b, a = interval 

276 return p ** r * (1 - ((poisson_interval((a - r + 1, a), p) - 

277 poisson_interval((b - r, b - 1), p)) / 

278 poisson_interval((b, a), p))) 

279 

280 def edgeworth(intervals): 

281 """Compute the Edgeworth expansion term of Sison & Glaz's formula 

282 (1) (approximated probability for multinomial proportions in a 

283 given box).""" 

284 # Compute means and central moments of the truncated poisson 

285 # variables. 

286 mu_r1, mu_r2, mu_r3, mu_r4 = [ 

287 np.array([truncated_poisson_factorial_moment(interval, r, p) 

288 for (interval, p) in zip(intervals, counts)]) 

289 for r in range(1, 5) 

290 ] 

291 mu = mu_r1 

292 mu2 = mu_r2 + mu - mu ** 2 

293 mu3 = mu_r3 + mu_r2 * (3 - 3 * mu) + mu - 3 * mu ** 2 + 2 * mu ** 3 

294 mu4 = (mu_r4 + mu_r3 * (6 - 4 * mu) + 

295 mu_r2 * (7 - 12 * mu + 6 * mu ** 2) + 

296 mu - 4 * mu ** 2 + 6 * mu ** 3 - 3 * mu ** 4) 

297 

298 # Compute expansion factors, gamma_1 and gamma_2. 

299 g1 = mu3.sum() / mu2.sum() ** 1.5 

300 g2 = (mu4.sum() - 3 * (mu2 ** 2).sum()) / mu2.sum() ** 2 

301 

302 # Compute the expansion itself. 

303 x = (n - mu.sum()) / np.sqrt(mu2.sum()) 

304 phi = np.exp(- x ** 2 / 2) / np.sqrt(2 * np.pi) 

305 H3 = x ** 3 - 3 * x 

306 H4 = x ** 4 - 6 * x ** 2 + 3 

307 H6 = x ** 6 - 15 * x ** 4 + 45 * x ** 2 - 15 

308 f = phi * (1 + g1 * H3 / 6 + g2 * H4 / 24 + g1 ** 2 * H6 / 72) 

309 return f / np.sqrt(mu2.sum()) 

310 

311 

312 def approximated_multinomial_interval(intervals): 

313 """Compute approximated probability for Multinomial(n, proportions) 

314 to be in `intervals` (Sison & Glaz's formula (1)).""" 

315 return np.exp( 

316 np.sum(np.log([poisson_interval(interval, p) 

317 for (interval, p) in zip(intervals, counts)])) + 

318 np.log(edgeworth(intervals)) - 

319 np.log(stats.poisson._pmf(n, n)) 

320 ) 

321 

322 def nu(c): 

323 """Compute interval coverage for a given `c` (Sison & Glaz's 

324 formula (7)).""" 

325 return approximated_multinomial_interval( 

326 [(np.maximum(count - c, 0), np.minimum(count + c, n)) 

327 for count in counts]) 

328 

329 # Find the value of `c` that will give us the confidence intervals 

330 # (solving nu(c) <= 1 - alpha < nu(c + 1). 

331 c = 1.0 

332 nuc = nu(c) 

333 nucp1 = nu(c + 1) 

334 while not (nuc <= (1 - alpha) < nucp1): 

335 if c > n: 

336 raise Exception("Couldn't find a value for `c` that " 

337 "solves nu(c) <= 1 - alpha < nu(c + 1)") 

338 c += 1 

339 nuc = nucp1 

340 nucp1 = nu(c + 1) 

341 

342 # Compute gamma and the corresponding confidence intervals. 

343 g = (1 - alpha - nuc) / (nucp1 - nuc) 

344 ci_lower = np.maximum(proportions - c / n, 0) 

345 ci_upper = np.minimum(proportions + (c + 2 * g) / n, 1) 

346 region = np.array([ci_lower, ci_upper]).T 

347 else: 

348 raise NotImplementedError('method "%s" is not available' % method) 

349 return region 

350 

351 

352def samplesize_confint_proportion(proportion, half_length, alpha=0.05, 

353 method='normal'): 

354 '''find sample size to get desired confidence interval length 

355 

356 Parameters 

357 ---------- 

358 proportion : float in (0, 1) 

359 proportion or quantile 

360 half_length : float in (0, 1) 

361 desired half length of the confidence interval 

362 alpha : float in (0, 1) 

363 significance level, default 0.05, 

364 coverage of the two-sided interval is (approximately) ``1 - alpha`` 

365 method : str in ['normal'] 

366 method to use for confidence interval, 

367 currently only normal approximation 

368 

369 Returns 

370 ------- 

371 n : float 

372 sample size to get the desired half length of the confidence interval 

373 

374 Notes 

375 ----- 

376 this is mainly to store the formula. 

377 possible application: number of replications in bootstrap samples 

378 

379 ''' 

380 q_ = proportion 

381 if method == 'normal': 

382 n = q_ * (1 - q_) / (half_length / stats.norm.isf(alpha / 2.))**2 

383 else: 

384 raise NotImplementedError('only "normal" is available') 

385 

386 return n 

387 

388def proportion_effectsize(prop1, prop2, method='normal'): 

389 ''' 

390 Effect size for a test comparing two proportions 

391 

392 for use in power function 

393 

394 Parameters 

395 ---------- 

396 prop1, prop2 : float or array_like 

397 The proportion value(s). 

398 

399 Returns 

400 ------- 

401 es : float or ndarray 

402 effect size for (transformed) prop1 - prop2 

403 

404 Notes 

405 ----- 

406 only method='normal' is implemented to match pwr.p2.test 

407 see http://www.statmethods.net/stats/power.html 

408 

409 Effect size for `normal` is defined as :: 

410 

411 2 * (arcsin(sqrt(prop1)) - arcsin(sqrt(prop2))) 

412 

413 I think other conversions to normality can be used, but I need to check. 

414 

415 Examples 

416 -------- 

417 >>> import statsmodels.api as sm 

418 >>> sm.stats.proportion_effectsize(0.5, 0.4) 

419 0.20135792079033088 

420 >>> sm.stats.proportion_effectsize([0.3, 0.4, 0.5], 0.4) 

421 array([-0.21015893, 0. , 0.20135792]) 

422 

423 ''' 

424 if method != 'normal': 

425 raise ValueError('only "normal" is implemented') 

426 

427 es = 2 * (np.arcsin(np.sqrt(prop1)) - np.arcsin(np.sqrt(prop2))) 

428 return es 

429 

430def std_prop(prop, nobs): 

431 '''standard error for the estimate of a proportion 

432 

433 This is just ``np.sqrt(p * (1. - p) / nobs)`` 

434 

435 Parameters 

436 ---------- 

437 prop : array_like 

438 proportion 

439 nobs : int, array_like 

440 number of observations 

441 

442 Returns 

443 ------- 

444 std : array_like 

445 standard error for a proportion of nobs independent observations 

446 ''' 

447 return np.sqrt(prop * (1. - prop) / nobs) 

448 

449def _power_ztost(mean_low, var_low, mean_upp, var_upp, mean_alt, var_alt, 

450 alpha=0.05, discrete=True, dist='norm', nobs=None, 

451 continuity=0, critval_continuity=0): 

452 '''Generic statistical power function for normal based equivalence test 

453 

454 This includes options to adjust the normal approximation and can use 

455 the binomial to evaluate the probability of the rejection region 

456 

457 see power_ztost_prob for a description of the options 

458 ''' 

459 # TODO: refactor structure, separate norm and binom better 

460 if not isinstance(continuity, tuple): 

461 continuity = (continuity, continuity) 

462 crit = stats.norm.isf(alpha) 

463 k_low = mean_low + np.sqrt(var_low) * crit 

464 k_upp = mean_upp - np.sqrt(var_upp) * crit 

465 if discrete or dist == 'binom': 

466 k_low = np.ceil(k_low * nobs + 0.5 * critval_continuity) 

467 k_upp = np.trunc(k_upp * nobs - 0.5 * critval_continuity) 

468 if dist == 'norm': 

469 #need proportion 

470 k_low = (k_low) * 1. / nobs #-1 to match PASS 

471 k_upp = k_upp * 1. / nobs 

472# else: 

473# if dist == 'binom': 

474# #need counts 

475# k_low *= nobs 

476# k_upp *= nobs 

477 #print mean_low, np.sqrt(var_low), crit, var_low 

478 #print mean_upp, np.sqrt(var_upp), crit, var_upp 

479 if np.any(k_low > k_upp): #vectorize 

480 import warnings 

481 warnings.warn("no overlap, power is zero", HypothesisTestWarning) 

482 std_alt = np.sqrt(var_alt) 

483 z_low = (k_low - mean_alt - continuity[0] * 0.5 / nobs) / std_alt 

484 z_upp = (k_upp - mean_alt + continuity[1] * 0.5 / nobs) / std_alt 

485 if dist == 'norm': 

486 power = stats.norm.cdf(z_upp) - stats.norm.cdf(z_low) 

487 elif dist == 'binom': 

488 power = (stats.binom.cdf(k_upp, nobs, mean_alt) - 

489 stats.binom.cdf(k_low-1, nobs, mean_alt)) 

490 return power, (k_low, k_upp, z_low, z_upp) 

491 

492 

493def binom_tost(count, nobs, low, upp): 

494 '''exact TOST test for one proportion using binomial distribution 

495 

496 Parameters 

497 ---------- 

498 count : {int, array_like} 

499 the number of successes in nobs trials. 

500 nobs : int 

501 the number of trials or observations. 

502 low, upp : floats 

503 lower and upper limit of equivalence region 

504 

505 Returns 

506 ------- 

507 pvalue : float 

508 p-value of equivalence test 

509 pval_low, pval_upp : floats 

510 p-values of lower and upper one-sided tests 

511 

512 ''' 

513 # binom_test_stat only returns pval 

514 tt1 = binom_test(count, nobs, alternative='larger', prop=low) 

515 tt2 = binom_test(count, nobs, alternative='smaller', prop=upp) 

516 return np.maximum(tt1, tt2), tt1, tt2, 

517 

518 

519def binom_tost_reject_interval(low, upp, nobs, alpha=0.05): 

520 '''rejection region for binomial TOST 

521 

522 The interval includes the end points, 

523 `reject` if and only if `r_low <= x <= r_upp`. 

524 

525 The interval might be empty with `r_upp < r_low`. 

526 

527 Parameters 

528 ---------- 

529 low, upp : floats 

530 lower and upper limit of equivalence region 

531 nobs : int 

532 the number of trials or observations. 

533 

534 Returns 

535 ------- 

536 x_low, x_upp : float 

537 lower and upper bound of rejection region 

538 

539 ''' 

540 x_low = stats.binom.isf(alpha, nobs, low) + 1 

541 x_upp = stats.binom.ppf(alpha, nobs, upp) - 1 

542 return x_low, x_upp 

543 

544def binom_test_reject_interval(value, nobs, alpha=0.05, alternative='two-sided'): 

545 '''rejection region for binomial test for one sample proportion 

546 

547 The interval includes the end points of the rejection region. 

548 

549 Parameters 

550 ---------- 

551 value : float 

552 proportion under the Null hypothesis 

553 nobs : int 

554 the number of trials or observations. 

555 

556 

557 Returns 

558 ------- 

559 x_low, x_upp : float 

560 lower and upper bound of rejection region 

561 

562 

563 ''' 

564 if alternative in ['2s', 'two-sided']: 

565 alternative = '2s' # normalize alternative name 

566 alpha = alpha / 2 

567 

568 if alternative in ['2s', 'smaller']: 

569 x_low = stats.binom.ppf(alpha, nobs, value) - 1 

570 else: 

571 x_low = 0 

572 if alternative in ['2s', 'larger']: 

573 x_upp = stats.binom.isf(alpha, nobs, value) + 1 

574 else : 

575 x_upp = nobs 

576 

577 return x_low, x_upp 

578 

579def binom_test(count, nobs, prop=0.5, alternative='two-sided'): 

580 '''Perform a test that the probability of success is p. 

581 

582 This is an exact, two-sided test of the null hypothesis 

583 that the probability of success in a Bernoulli experiment 

584 is `p`. 

585 

586 Parameters 

587 ---------- 

588 count : {int, array_like} 

589 the number of successes in nobs trials. 

590 nobs : int 

591 the number of trials or observations. 

592 prop : float, optional 

593 The probability of success under the null hypothesis, 

594 `0 <= prop <= 1`. The default value is `prop = 0.5` 

595 alternative : str in ['two-sided', 'smaller', 'larger'] 

596 alternative hypothesis, which can be two-sided or either one of the 

597 one-sided tests. 

598 

599 Returns 

600 ------- 

601 p-value : float 

602 The p-value of the hypothesis test 

603 

604 Notes 

605 ----- 

606 This uses scipy.stats.binom_test for the two-sided alternative. 

607 

608 ''' 

609 

610 if np.any(prop > 1.0) or np.any(prop < 0.0): 

611 raise ValueError("p must be in range [0,1]") 

612 if alternative in ['2s', 'two-sided']: 

613 pval = stats.binom_test(count, n=nobs, p=prop) 

614 elif alternative in ['l', 'larger']: 

615 pval = stats.binom.sf(count-1, nobs, prop) 

616 elif alternative in ['s', 'smaller']: 

617 pval = stats.binom.cdf(count, nobs, prop) 

618 else: 

619 raise ValueError('alternative not recognized\n' 

620 'should be two-sided, larger or smaller') 

621 return pval 

622 

623 

624def power_binom_tost(low, upp, nobs, p_alt=None, alpha=0.05): 

625 if p_alt is None: 

626 p_alt = 0.5 * (low + upp) 

627 x_low, x_upp = binom_tost_reject_interval(low, upp, nobs, alpha=alpha) 

628 power = (stats.binom.cdf(x_upp, nobs, p_alt) - 

629 stats.binom.cdf(x_low-1, nobs, p_alt)) 

630 return power 

631 

632def power_ztost_prop(low, upp, nobs, p_alt, alpha=0.05, dist='norm', 

633 variance_prop=None, discrete=True, continuity=0, 

634 critval_continuity=0): 

635 '''Power of proportions equivalence test based on normal distribution 

636 

637 Parameters 

638 ---------- 

639 low, upp : floats 

640 lower and upper limit of equivalence region 

641 nobs : int 

642 number of observations 

643 p_alt : float in (0,1) 

644 proportion under the alternative 

645 alpha : float in (0,1) 

646 significance level of the test 

647 dist : str in ['norm', 'binom'] 

648 This defines the distribution to evaluate the power of the test. The 

649 critical values of the TOST test are always based on the normal 

650 approximation, but the distribution for the power can be either the 

651 normal (default) or the binomial (exact) distribution. 

652 variance_prop : None or float in (0,1) 

653 If this is None, then the variances for the two one sided tests are 

654 based on the proportions equal to the equivalence limits. 

655 If variance_prop is given, then it is used to calculate the variance 

656 for the TOST statistics. If this is based on an sample, then the 

657 estimated proportion can be used. 

658 discrete : bool 

659 If true, then the critical values of the rejection region are converted 

660 to integers. If dist is "binom", this is automatically assumed. 

661 If discrete is false, then the TOST critical values are used as 

662 floating point numbers, and the power is calculated based on the 

663 rejection region that is not discretized. 

664 continuity : bool or float 

665 adjust the rejection region for the normal power probability. This has 

666 and effect only if ``dist='norm'`` 

667 critval_continuity : bool or float 

668 If this is non-zero, then the critical values of the tost rejection 

669 region are adjusted before converting to integers. This affects both 

670 distributions, ``dist='norm'`` and ``dist='binom'``. 

671 

672 Returns 

673 ------- 

674 power : float 

675 statistical power of the equivalence test. 

676 (k_low, k_upp, z_low, z_upp) : tuple of floats 

677 critical limits in intermediate steps 

678 temporary return, will be changed 

679 

680 Notes 

681 ----- 

682 In small samples the power for the ``discrete`` version, has a sawtooth 

683 pattern as a function of the number of observations. As a consequence, 

684 small changes in the number of observations or in the normal approximation 

685 can have a large effect on the power. 

686 

687 ``continuity`` and ``critval_continuity`` are added to match some results 

688 of PASS, and are mainly to investigate the sensitivity of the ztost power 

689 to small changes in the rejection region. From my interpretation of the 

690 equations in the SAS manual, both are zero in SAS. 

691 

692 works vectorized 

693 

694 **verification:** 

695 

696 The ``dist='binom'`` results match PASS, 

697 The ``dist='norm'`` results look reasonable, but no benchmark is available. 

698 

699 References 

700 ---------- 

701 SAS Manual: Chapter 68: The Power Procedure, Computational Resources 

702 PASS Chapter 110: Equivalence Tests for One Proportion. 

703 

704 ''' 

705 mean_low = low 

706 var_low = std_prop(low, nobs)**2 

707 mean_upp = upp 

708 var_upp = std_prop(upp, nobs)**2 

709 mean_alt = p_alt 

710 var_alt = std_prop(p_alt, nobs)**2 

711 if variance_prop is not None: 

712 var_low = var_upp = std_prop(variance_prop, nobs)**2 

713 power = _power_ztost(mean_low, var_low, mean_upp, var_upp, mean_alt, var_alt, 

714 alpha=alpha, discrete=discrete, dist=dist, nobs=nobs, 

715 continuity=continuity, critval_continuity=critval_continuity) 

716 return np.maximum(power[0], 0), power[1:] 

717 

718 

719def _table_proportion(count, nobs): 

720 '''create a k by 2 contingency table for proportion 

721 

722 helper function for proportions_chisquare 

723 

724 Parameters 

725 ---------- 

726 count : {int, array_like} 

727 the number of successes in nobs trials. 

728 nobs : int 

729 the number of trials or observations. 

730 

731 Returns 

732 ------- 

733 table : ndarray 

734 (k, 2) contingency table 

735 

736 Notes 

737 ----- 

738 recent scipy has more elaborate contingency table functions 

739 

740 ''' 

741 table = np.column_stack((count, nobs - count)) 

742 expected = table.sum(0) * table.sum(1)[:,None] * 1. / table.sum() 

743 n_rows = table.shape[0] 

744 return table, expected, n_rows 

745 

746 

747def proportions_ztest(count, nobs, value=None, alternative='two-sided', 

748 prop_var=False): 

749 """ 

750 Test for proportions based on normal (z) test 

751 

752 Parameters 

753 ---------- 

754 count : {int, array_like} 

755 the number of successes in nobs trials. If this is array_like, then 

756 the assumption is that this represents the number of successes for 

757 each independent sample 

758 nobs : {int, array_like} 

759 the number of trials or observations, with the same length as 

760 count. 

761 value : float, array_like or None, optional 

762 This is the value of the null hypothesis equal to the proportion in the 

763 case of a one sample test. In the case of a two-sample test, the 

764 null hypothesis is that prop[0] - prop[1] = value, where prop is the 

765 proportion in the two samples. If not provided value = 0 and the null 

766 is prop[0] = prop[1] 

767 alternative : str in ['two-sided', 'smaller', 'larger'] 

768 The alternative hypothesis can be either two-sided or one of the one- 

769 sided tests, smaller means that the alternative hypothesis is 

770 ``prop < value`` and larger means ``prop > value``. In the two sample 

771 test, smaller means that the alternative hypothesis is ``p1 < p2`` and 

772 larger means ``p1 > p2`` where ``p1`` is the proportion of the first 

773 sample and ``p2`` of the second one. 

774 prop_var : False or float in (0, 1) 

775 If prop_var is false, then the variance of the proportion estimate is 

776 calculated based on the sample proportion. Alternatively, a proportion 

777 can be specified to calculate this variance. Common use case is to 

778 use the proportion under the Null hypothesis to specify the variance 

779 of the proportion estimate. 

780 

781 Returns 

782 ------- 

783 zstat : float 

784 test statistic for the z-test 

785 p-value : float 

786 p-value for the z-test 

787 

788 Examples 

789 -------- 

790 >>> count = 5 

791 >>> nobs = 83 

792 >>> value = .05 

793 >>> stat, pval = proportions_ztest(count, nobs, value) 

794 >>> print('{0:0.3f}'.format(pval)) 

795 0.695 

796 

797 >>> import numpy as np 

798 >>> from statsmodels.stats.proportion import proportions_ztest 

799 >>> count = np.array([5, 12]) 

800 >>> nobs = np.array([83, 99]) 

801 >>> stat, pval = proportions_ztest(count, nobs) 

802 >>> print('{0:0.3f}'.format(pval)) 

803 0.159 

804 

805 Notes 

806 ----- 

807 This uses a simple normal test for proportions. It should be the same as 

808 running the mean z-test on the data encoded 1 for event and 0 for no event 

809 so that the sum corresponds to the count. 

810 

811 In the one and two sample cases with two-sided alternative, this test 

812 produces the same p-value as ``proportions_chisquare``, since the 

813 chisquare is the distribution of the square of a standard normal 

814 distribution. 

815 """ 

816 # TODO: verify that this really holds 

817 # TODO: add continuity correction or other improvements for small samples 

818 # TODO: change options similar to propotion_ztost ? 

819 

820 count = np.asarray(count) 

821 nobs = np.asarray(nobs) 

822 

823 if nobs.size == 1: 

824 nobs = nobs * np.ones_like(count) 

825 

826 prop = count * 1. / nobs 

827 k_sample = np.size(prop) 

828 if value is None: 

829 if k_sample == 1: 

830 raise ValueError('value must be provided for a 1-sample test') 

831 value = 0 

832 if k_sample == 1: 

833 diff = prop - value 

834 elif k_sample == 2: 

835 diff = prop[0] - prop[1] - value 

836 else: 

837 msg = 'more than two samples are not implemented yet' 

838 raise NotImplementedError(msg) 

839 

840 p_pooled = np.sum(count) * 1. / np.sum(nobs) 

841 

842 nobs_fact = np.sum(1. / nobs) 

843 if prop_var: 

844 p_pooled = prop_var 

845 var_ = p_pooled * (1 - p_pooled) * nobs_fact 

846 std_diff = np.sqrt(var_) 

847 from statsmodels.stats.weightstats import _zstat_generic2 

848 return _zstat_generic2(diff, std_diff, alternative) 

849 

850def proportions_ztost(count, nobs, low, upp, prop_var='sample'): 

851 '''Equivalence test based on normal distribution 

852 

853 Parameters 

854 ---------- 

855 count : {int, array_like} 

856 the number of successes in nobs trials. If this is array_like, then 

857 the assumption is that this represents the number of successes for 

858 each independent sample 

859 nobs : int 

860 the number of trials or observations, with the same length as 

861 count. 

862 low, upp : float 

863 equivalence interval low < prop1 - prop2 < upp 

864 prop_var : str or float in (0, 1) 

865 prop_var determines which proportion is used for the calculation 

866 of the standard deviation of the proportion estimate 

867 The available options for string are 'sample' (default), 'null' and 

868 'limits'. If prop_var is a float, then it is used directly. 

869 

870 Returns 

871 ------- 

872 pvalue : float 

873 pvalue of the non-equivalence test 

874 t1, pv1 : tuple of floats 

875 test statistic and pvalue for lower threshold test 

876 t2, pv2 : tuple of floats 

877 test statistic and pvalue for upper threshold test 

878 

879 Notes 

880 ----- 

881 checked only for 1 sample case 

882 

883 ''' 

884 if prop_var == 'limits': 

885 prop_var_low = low 

886 prop_var_upp = upp 

887 elif prop_var == 'sample': 

888 prop_var_low = prop_var_upp = False #ztest uses sample 

889 elif prop_var == 'null': 

890 prop_var_low = prop_var_upp = 0.5 * (low + upp) 

891 elif np.isreal(prop_var): 

892 prop_var_low = prop_var_upp = prop_var 

893 

894 tt1 = proportions_ztest(count, nobs, alternative='larger', 

895 prop_var=prop_var_low, value=low) 

896 tt2 = proportions_ztest(count, nobs, alternative='smaller', 

897 prop_var=prop_var_upp, value=upp) 

898 return np.maximum(tt1[1], tt2[1]), tt1, tt2, 

899 

900def proportions_chisquare(count, nobs, value=None): 

901 '''test for proportions based on chisquare test 

902 

903 Parameters 

904 ---------- 

905 count : {int, array_like} 

906 the number of successes in nobs trials. If this is array_like, then 

907 the assumption is that this represents the number of successes for 

908 each independent sample 

909 nobs : int 

910 the number of trials or observations, with the same length as 

911 count. 

912 value : None or float or array_like 

913 

914 Returns 

915 ------- 

916 chi2stat : float 

917 test statistic for the chisquare test 

918 p-value : float 

919 p-value for the chisquare test 

920 (table, expected) 

921 table is a (k, 2) contingency table, ``expected`` is the corresponding 

922 table of counts that are expected under independence with given 

923 margins 

924 

925 

926 Notes 

927 ----- 

928 Recent version of scipy.stats have a chisquare test for independence in 

929 contingency tables. 

930 

931 This function provides a similar interface to chisquare tests as 

932 ``prop.test`` in R, however without the option for Yates continuity 

933 correction. 

934 

935 count can be the count for the number of events for a single proportion, 

936 or the counts for several independent proportions. If value is given, then 

937 all proportions are jointly tested against this value. If value is not 

938 given and count and nobs are not scalar, then the null hypothesis is 

939 that all samples have the same proportion. 

940 

941 ''' 

942 nobs = np.atleast_1d(nobs) 

943 table, expected, n_rows = _table_proportion(count, nobs) 

944 if value is not None: 

945 expected = np.column_stack((nobs * value, nobs * (1 - value))) 

946 ddof = n_rows - 1 

947 else: 

948 ddof = n_rows 

949 

950 #print table, expected 

951 chi2stat, pval = stats.chisquare(table.ravel(), expected.ravel(), 

952 ddof=ddof) 

953 return chi2stat, pval, (table, expected) 

954 

955 

956 

957 

958def proportions_chisquare_allpairs(count, nobs, multitest_method='hs'): 

959 '''chisquare test of proportions for all pairs of k samples 

960 

961 Performs a chisquare test for proportions for all pairwise comparisons. 

962 The alternative is two-sided 

963 

964 Parameters 

965 ---------- 

966 count : {int, array_like} 

967 the number of successes in nobs trials. 

968 nobs : int 

969 the number of trials or observations. 

970 prop : float, optional 

971 The probability of success under the null hypothesis, 

972 `0 <= prop <= 1`. The default value is `prop = 0.5` 

973 multitest_method : str 

974 This chooses the method for the multiple testing p-value correction, 

975 that is used as default in the results. 

976 It can be any method that is available in ``multipletesting``. 

977 The default is Holm-Sidak 'hs'. 

978 

979 Returns 

980 ------- 

981 result : AllPairsResults instance 

982 The returned results instance has several statistics, such as p-values, 

983 attached, and additional methods for using a non-default 

984 ``multitest_method``. 

985 

986 Notes 

987 ----- 

988 Yates continuity correction is not available. 

989 ''' 

990 #all_pairs = lmap(list, lzip(*np.triu_indices(4, 1))) 

991 all_pairs = lzip(*np.triu_indices(len(count), 1)) 

992 pvals = [proportions_chisquare(count[list(pair)], nobs[list(pair)])[1] 

993 for pair in all_pairs] 

994 return AllPairsResults(pvals, all_pairs, multitest_method=multitest_method) 

995 

996def proportions_chisquare_pairscontrol(count, nobs, value=None, 

997 multitest_method='hs', alternative='two-sided'): 

998 '''chisquare test of proportions for pairs of k samples compared to control 

999 

1000 Performs a chisquare test for proportions for pairwise comparisons with a 

1001 control (Dunnet's test). The control is assumed to be the first element 

1002 of ``count`` and ``nobs``. The alternative is two-sided, larger or 

1003 smaller. 

1004 

1005 Parameters 

1006 ---------- 

1007 count : {int, array_like} 

1008 the number of successes in nobs trials. 

1009 nobs : int 

1010 the number of trials or observations. 

1011 prop : float, optional 

1012 The probability of success under the null hypothesis, 

1013 `0 <= prop <= 1`. The default value is `prop = 0.5` 

1014 multitest_method : str 

1015 This chooses the method for the multiple testing p-value correction, 

1016 that is used as default in the results. 

1017 It can be any method that is available in ``multipletesting``. 

1018 The default is Holm-Sidak 'hs'. 

1019 alternative : str in ['two-sided', 'smaller', 'larger'] 

1020 alternative hypothesis, which can be two-sided or either one of the 

1021 one-sided tests. 

1022 

1023 Returns 

1024 ------- 

1025 result : AllPairsResults instance 

1026 The returned results instance has several statistics, such as p-values, 

1027 attached, and additional methods for using a non-default 

1028 ``multitest_method``. 

1029 

1030 

1031 Notes 

1032 ----- 

1033 Yates continuity correction is not available. 

1034 

1035 ``value`` and ``alternative`` options are not yet implemented. 

1036 

1037 ''' 

1038 if (value is not None) or (alternative not in ['two-sided', '2s']): 

1039 raise NotImplementedError 

1040 #all_pairs = lmap(list, lzip(*np.triu_indices(4, 1))) 

1041 all_pairs = [(0, k) for k in range(1, len(count))] 

1042 pvals = [proportions_chisquare(count[list(pair)], nobs[list(pair)], 

1043 #alternative=alternative)[1] 

1044 )[1] 

1045 for pair in all_pairs] 

1046 return AllPairsResults(pvals, all_pairs, multitest_method=multitest_method)