Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/stats/power.py : 23%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2#pylint: disable-msg=W0142
3"""Statistical power, solving for nobs, ... - trial version
5Created on Sat Jan 12 21:48:06 2013
7Author: Josef Perktold
9Example
10roundtrip - root with respect to all variables
12 calculated, desired
13nobs 33.367204205 33.367204205
14effect 0.5 0.5
15alpha 0.05 0.05
16power 0.8 0.8
19TODO:
20refactoring
21 - rename beta -> power, beta (type 2 error is beta = 1-power) DONE
22 - I think the current implementation can handle any kinds of extra keywords
23 (except for maybe raising meaningful exceptions
24 - streamline code, I think internally classes can be merged
25 how to extend to k-sample tests?
26 user interface for different tests that map to the same (internal) test class
27 - sequence of arguments might be inconsistent,
28 arg and/or kwds so python checks what's required and what can be None.
29 - templating for docstrings ?
32"""
33from statsmodels.compat.python import iteritems
34import numpy as np
35from scipy import stats, optimize
36from statsmodels.tools.rootfinding import brentq_expanding
38def ttest_power(effect_size, nobs, alpha, df=None, alternative='two-sided'):
39 '''Calculate power of a ttest
40 '''
41 d = effect_size
42 if df is None:
43 df = nobs - 1
45 if alternative in ['two-sided', '2s']:
46 alpha_ = alpha / 2. #no inplace changes, does not work
47 elif alternative in ['smaller', 'larger']:
48 alpha_ = alpha
49 else:
50 raise ValueError("alternative has to be 'two-sided', 'larger' " +
51 "or 'smaller'")
53 pow_ = 0
54 if alternative in ['two-sided', '2s', 'larger']:
55 crit_upp = stats.t.isf(alpha_, df)
56 #print crit_upp, df, d*np.sqrt(nobs)
57 # use private methods, generic methods return nan with negative d
58 if np.any(np.isnan(crit_upp)):
59 # avoid endless loop, https://github.com/scipy/scipy/issues/2667
60 pow_ = np.nan
61 else:
62 pow_ = stats.nct._sf(crit_upp, df, d*np.sqrt(nobs))
63 if alternative in ['two-sided', '2s', 'smaller']:
64 crit_low = stats.t.ppf(alpha_, df)
65 #print crit_low, df, d*np.sqrt(nobs)
66 if np.any(np.isnan(crit_low)):
67 pow_ = np.nan
68 else:
69 pow_ += stats.nct._cdf(crit_low, df, d*np.sqrt(nobs))
70 return pow_
72def normal_power(effect_size, nobs, alpha, alternative='two-sided', sigma=1.):
73 '''Calculate power of a normal distributed test statistic
75 '''
76 d = effect_size
78 if alternative in ['two-sided', '2s']:
79 alpha_ = alpha / 2. #no inplace changes, does not work
80 elif alternative in ['smaller', 'larger']:
81 alpha_ = alpha
82 else:
83 raise ValueError("alternative has to be 'two-sided', 'larger' " +
84 "or 'smaller'")
86 pow_ = 0
87 if alternative in ['two-sided', '2s', 'larger']:
88 crit = stats.norm.isf(alpha_)
89 pow_ = stats.norm.sf(crit - d*np.sqrt(nobs)/sigma)
90 if alternative in ['two-sided', '2s', 'smaller']:
91 crit = stats.norm.ppf(alpha_)
92 pow_ += stats.norm.cdf(crit - d*np.sqrt(nobs)/sigma)
93 return pow_
95def ftest_anova_power(effect_size, nobs, alpha, k_groups=2, df=None):
96 '''power for ftest for one way anova with k equal sized groups
98 nobs total sample size, sum over all groups
100 should be general nobs observations, k_groups restrictions ???
101 '''
102 df_num = nobs - k_groups
103 df_denom = k_groups - 1
104 crit = stats.f.isf(alpha, df_denom, df_num)
105 pow_ = stats.ncf.sf(crit, df_denom, df_num, effect_size**2 * nobs)
106 return pow_#, crit
108def ftest_power(effect_size, df_num, df_denom, alpha, ncc=1):
109 '''Calculate the power of a F-test.
111 Parameters
112 ----------
113 effect_size : float
114 standardized effect size, mean divided by the standard deviation.
115 effect size has to be positive.
116 df_num : int or float
117 numerator degrees of freedom.
118 df_denom : int or float
119 denominator degrees of freedom.
120 alpha : float in interval (0,1)
121 significance level, e.g. 0.05, is the probability of a type I
122 error, that is wrong rejections if the Null Hypothesis is true.
123 ncc : int
124 degrees of freedom correction for non-centrality parameter.
125 see Notes
127 Returns
128 -------
129 power : float
130 Power of the test, e.g. 0.8, is one minus the probability of a
131 type II error. Power is the probability that the test correctly
132 rejects the Null Hypothesis if the Alternative Hypothesis is true.
134 Notes
135 -----
137 sample size is given implicitly by df_num
139 set ncc=0 to match t-test, or f-test in LikelihoodModelResults.
140 ncc=1 matches the non-centrality parameter in R::pwr::pwr.f2.test
142 ftest_power with ncc=0 should also be correct for f_test in regression
143 models, with df_num and d_denom as defined there. (not verified yet)
144 '''
145 nc = effect_size**2 * (df_denom + df_num + ncc)
146 crit = stats.f.isf(alpha, df_denom, df_num)
147 pow_ = stats.ncf.sf(crit, df_denom, df_num, nc)
148 return pow_ #, crit, nc
151#class based implementation
152#--------------------------
154class Power(object):
155 '''Statistical Power calculations, Base Class
157 so far this could all be class methods
158 '''
160 def __init__(self, **kwds):
161 self.__dict__.update(kwds)
162 # used only for instance level start values
163 self.start_ttp = dict(effect_size=0.01, nobs=10., alpha=0.15,
164 power=0.6, nobs1=10., ratio=1,
165 df_num=10, df_denom=3 # for FTestPower
166 )
167 # TODO: nobs1 and ratio are for ttest_ind,
168 # need start_ttp for each test/class separately,
169 # possible rootfinding problem for effect_size, starting small seems to
170 # work
171 from collections import defaultdict
172 self.start_bqexp = defaultdict(dict)
173 for key in ['nobs', 'nobs1', 'df_num', 'df_denom']:
174 self.start_bqexp[key] = dict(low=2., start_upp=50.)
175 for key in ['df_denom']:
176 self.start_bqexp[key] = dict(low=1., start_upp=50.)
177 for key in ['ratio']:
178 self.start_bqexp[key] = dict(low=1e-8, start_upp=2)
179 for key in ['alpha']:
180 self.start_bqexp[key] = dict(low=1e-12, upp=1 - 1e-12)
182 def power(self, *args, **kwds):
183 raise NotImplementedError
185 def _power_identity(self, *args, **kwds):
186 power_ = kwds.pop('power')
187 return self.power(*args, **kwds) - power_
189 def solve_power(self, **kwds):
190 '''solve for any one of the parameters of a t-test
192 for t-test the keywords are:
193 effect_size, nobs, alpha, power
195 exactly one needs to be ``None``, all others need numeric values
197 *attaches*
199 cache_fit_res : list
200 Cache of the result of the root finding procedure for the latest
201 call to ``solve_power``, mainly for debugging purposes.
202 The first element is the success indicator, one if successful.
203 The remaining elements contain the return information of the up to
204 three solvers that have been tried.
207 '''
208 #TODO: maybe use explicit kwds,
209 # nicer but requires inspect? and not generic across tests
210 # I'm duplicating this in the subclass to get informative docstring
211 key = [k for k,v in iteritems(kwds) if v is None]
212 #print kwds, key
213 if len(key) != 1:
214 raise ValueError('need exactly one keyword that is None')
215 key = key[0]
217 if key == 'power':
218 del kwds['power']
219 return self.power(**kwds)
221 if kwds['effect_size'] == 0:
222 import warnings
223 from statsmodels.tools.sm_exceptions import HypothesisTestWarning
224 warnings.warn('Warning: Effect size of 0 detected', HypothesisTestWarning)
225 if key == 'power':
226 return kwds['alpha']
227 if key == 'alpha':
228 return kwds['power']
229 else:
230 raise ValueError('Cannot detect an effect-size of 0. Try changing your effect-size.')
233 self._counter = 0
235 def func(x):
236 kwds[key] = x
237 fval = self._power_identity(**kwds)
238 self._counter += 1
239 #print self._counter,
240 if self._counter > 500:
241 raise RuntimeError('possible endless loop (500 NaNs)')
242 if np.isnan(fval):
243 return np.inf
244 else:
245 return fval
247 #TODO: I'm using the following so I get a warning when start_ttp is not defined
248 try:
249 start_value = self.start_ttp[key]
250 except KeyError:
251 start_value = 0.9
252 import warnings
253 from statsmodels.tools.sm_exceptions import ValueWarning
254 warnings.warn('Warning: using default start_value for {0}'.format(key), ValueWarning)
256 fit_kwds = self.start_bqexp[key]
257 fit_res = []
258 #print vars()
259 try:
260 val, res = brentq_expanding(func, full_output=True, **fit_kwds)
261 failed = False
262 fit_res.append(res)
263 except ValueError:
264 failed = True
265 fit_res.append(None)
267 success = None
268 if (not failed) and res.converged:
269 success = 1
270 else:
271 # try backup
272 # TODO: check more cases to make this robust
273 if not np.isnan(start_value):
274 val, infodict, ier, msg = optimize.fsolve(func, start_value,
275 full_output=True) #scalar
276 #val = optimize.newton(func, start_value) #scalar
277 fval = infodict['fvec']
278 fit_res.append(infodict)
279 else:
280 ier = -1
281 fval = 1
282 fit_res.append([None])
284 if ier == 1 and np.abs(fval) < 1e-4 :
285 success = 1
286 else:
287 #print infodict
288 if key in ['alpha', 'power', 'effect_size']:
289 val, r = optimize.brentq(func, 1e-8, 1-1e-8,
290 full_output=True) #scalar
291 success = 1 if r.converged else 0
292 fit_res.append(r)
293 else:
294 success = 0
296 if not success == 1:
297 import warnings
298 from statsmodels.tools.sm_exceptions import (ConvergenceWarning,
299 convergence_doc)
300 warnings.warn(convergence_doc, ConvergenceWarning)
302 #attach fit_res, for reading only, should be needed only for debugging
303 fit_res.insert(0, success)
304 self.cache_fit_res = fit_res
305 return val
307 def plot_power(self, dep_var='nobs', nobs=None, effect_size=None,
308 alpha=0.05, ax=None, title=None, plt_kwds=None, **kwds):
309 """
310 Plot power with number of observations or effect size on x-axis
312 Parameters
313 ----------
314 dep_var : {'nobs', 'effect_size', 'alpha'}
315 This specifies which variable is used for the horizontal axis.
316 If dep_var='nobs' (default), then one curve is created for each
317 value of ``effect_size``. If dep_var='effect_size' or alpha, then
318 one curve is created for each value of ``nobs``.
319 nobs : {scalar, array_like}
320 specifies the values of the number of observations in the plot
321 effect_size : {scalar, array_like}
322 specifies the values of the effect_size in the plot
323 alpha : {float, array_like}
324 The significance level (type I error) used in the power
325 calculation. Can only be more than a scalar, if ``dep_var='alpha'``
326 ax : None or axis instance
327 If ax is None, than a matplotlib figure is created. If ax is a
328 matplotlib axis instance, then it is reused, and the plot elements
329 are created with it.
330 title : str
331 title for the axis. Use an empty string, ``''``, to avoid a title.
332 plt_kwds : {None, dict}
333 not used yet
334 kwds : dict
335 These remaining keyword arguments are used as arguments to the
336 power function. Many power function support ``alternative`` as a
337 keyword argument, two-sample test support ``ratio``.
339 Returns
340 -------
341 Figure
342 If `ax` is None, the created figure. Otherwise the figure to which
343 `ax` is connected.
345 Notes
346 -----
347 This works only for classes where the ``power`` method has
348 ``effect_size``, ``nobs`` and ``alpha`` as the first three arguments.
349 If the second argument is ``nobs1``, then the number of observations
350 in the plot are those for the first sample.
351 TODO: fix this for FTestPower and GofChisquarePower
353 TODO: maybe add line variable, if we want more than nobs and effectsize
354 """
355 #if pwr_kwds is None:
356 # pwr_kwds = {}
357 from statsmodels.graphics import utils
358 from statsmodels.graphics.plottools import rainbow
359 fig, ax = utils.create_mpl_ax(ax)
360 import matplotlib.pyplot as plt
361 colormap = plt.cm.Dark2 #pylint: disable-msg=E1101
362 plt_alpha = 1 #0.75
363 lw = 2
364 if dep_var == 'nobs':
365 colors = rainbow(len(effect_size))
366 colors = [colormap(i) for i in np.linspace(0, 0.9, len(effect_size))]
367 for ii, es in enumerate(effect_size):
368 power = self.power(es, nobs, alpha, **kwds)
369 ax.plot(nobs, power, lw=lw, alpha=plt_alpha,
370 color=colors[ii], label='es=%4.2F' % es)
371 xlabel = 'Number of Observations'
372 elif dep_var in ['effect size', 'effect_size', 'es']:
373 colors = rainbow(len(nobs))
374 colors = [colormap(i) for i in np.linspace(0, 0.9, len(nobs))]
375 for ii, n in enumerate(nobs):
376 power = self.power(effect_size, n, alpha, **kwds)
377 ax.plot(effect_size, power, lw=lw, alpha=plt_alpha,
378 color=colors[ii], label='N=%4.2F' % n)
379 xlabel = 'Effect Size'
380 elif dep_var in ['alpha']:
381 # experimental nobs as defining separate lines
382 colors = rainbow(len(nobs))
384 for ii, n in enumerate(nobs):
385 power = self.power(effect_size, n, alpha, **kwds)
386 ax.plot(alpha, power, lw=lw, alpha=plt_alpha,
387 color=colors[ii], label='N=%4.2F' % n)
388 xlabel = 'alpha'
389 else:
390 raise ValueError('depvar not implemented')
392 if title is None:
393 title = 'Power of Test'
394 ax.set_xlabel(xlabel)
395 ax.set_title(title)
396 ax.legend(loc='lower right')
397 return fig
400class TTestPower(Power):
401 '''Statistical Power calculations for one sample or paired sample t-test
403 '''
405 def power(self, effect_size, nobs, alpha, df=None, alternative='two-sided'):
406 '''Calculate the power of a t-test for one sample or paired samples.
408 Parameters
409 ----------
410 effect_size : float
411 standardized effect size, mean divided by the standard deviation.
412 effect size has to be positive.
413 nobs : int or float
414 sample size, number of observations.
415 alpha : float in interval (0,1)
416 significance level, e.g. 0.05, is the probability of a type I
417 error, that is wrong rejections if the Null Hypothesis is true.
418 df : int or float
419 degrees of freedom. By default this is None, and the df from the
420 one sample or paired ttest is used, ``df = nobs1 - 1``
421 alternative : str, 'two-sided' (default), 'larger', 'smaller'
422 extra argument to choose whether the power is calculated for a
423 two-sided (default) or one sided test. The one-sided test can be
424 either 'larger', 'smaller'.
425 .
427 Returns
428 -------
429 power : float
430 Power of the test, e.g. 0.8, is one minus the probability of a
431 type II error. Power is the probability that the test correctly
432 rejects the Null Hypothesis if the Alternative Hypothesis is true.
434 '''
435 # for debugging
436 #print 'calling ttest power with', (effect_size, nobs, alpha, df, alternative)
437 return ttest_power(effect_size, nobs, alpha, df=df,
438 alternative=alternative)
440 #method is only added to have explicit keywords and docstring
441 def solve_power(self, effect_size=None, nobs=None, alpha=None, power=None,
442 alternative='two-sided'):
443 '''solve for any one parameter of the power of a one sample t-test
445 for the one sample t-test the keywords are:
446 effect_size, nobs, alpha, power
448 Exactly one needs to be ``None``, all others need numeric values.
450 This test can also be used for a paired t-test, where effect size is
451 defined in terms of the mean difference, and nobs is the number of
452 pairs.
454 Parameters
455 ----------
456 effect_size : float
457 standardized effect size, mean divided by the standard deviation.
458 effect size has to be positive.
459 nobs : int or float
460 sample size, number of observations.
461 alpha : float in interval (0,1)
462 significance level, e.g. 0.05, is the probability of a type I
463 error, that is wrong rejections if the Null Hypothesis is true.
464 power : float in interval (0,1)
465 power of the test, e.g. 0.8, is one minus the probability of a
466 type II error. Power is the probability that the test correctly
467 rejects the Null Hypothesis if the Alternative Hypothesis is true.
468 alternative : str, 'two-sided' (default) or 'one-sided'
469 extra argument to choose whether the power is calculated for a
470 two-sided (default) or one sided test.
471 'one-sided' assumes we are in the relevant tail.
473 Returns
474 -------
475 value : float
476 The value of the parameter that was set to None in the call. The
477 value solves the power equation given the remaining parameters.
479 *attaches*
481 cache_fit_res : list
482 Cache of the result of the root finding procedure for the latest
483 call to ``solve_power``, mainly for debugging purposes.
484 The first element is the success indicator, one if successful.
485 The remaining elements contain the return information of the up to
486 three solvers that have been tried.
488 Notes
489 -----
490 The function uses scipy.optimize for finding the value that satisfies
491 the power equation. It first uses ``brentq`` with a prior search for
492 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve``
493 also fails, then, for ``alpha``, ``power`` and ``effect_size``,
494 ``brentq`` with fixed bounds is used. However, there can still be cases
495 where this fails.
497 '''
498 # for debugging
499 #print 'calling ttest solve with', (effect_size, nobs, alpha, power, alternative)
500 return super(TTestPower, self).solve_power(effect_size=effect_size,
501 nobs=nobs,
502 alpha=alpha,
503 power=power,
504 alternative=alternative)
506class TTestIndPower(Power):
507 '''Statistical Power calculations for t-test for two independent sample
509 currently only uses pooled variance
511 '''
514 def power(self, effect_size, nobs1, alpha, ratio=1, df=None,
515 alternative='two-sided'):
516 '''Calculate the power of a t-test for two independent sample
518 Parameters
519 ----------
520 effect_size : float
521 standardized effect size, difference between the two means divided
522 by the standard deviation. `effect_size` has to be positive.
523 nobs1 : int or float
524 number of observations of sample 1. The number of observations of
525 sample two is ratio times the size of sample 1,
526 i.e. ``nobs2 = nobs1 * ratio``
527 alpha : float in interval (0,1)
528 significance level, e.g. 0.05, is the probability of a type I
529 error, that is wrong rejections if the Null Hypothesis is true.
530 ratio : float
531 ratio of the number of observations in sample 2 relative to
532 sample 1. see description of nobs1
533 The default for ratio is 1; to solve for ratio given the other
534 arguments, it has to be explicitly set to None.
535 df : int or float
536 degrees of freedom. By default this is None, and the df from the
537 ttest with pooled variance is used, ``df = (nobs1 - 1 + nobs2 - 1)``
538 alternative : str, 'two-sided' (default), 'larger', 'smaller'
539 extra argument to choose whether the power is calculated for a
540 two-sided (default) or one sided test. The one-sided test can be
541 either 'larger', 'smaller'.
543 Returns
544 -------
545 power : float
546 Power of the test, e.g. 0.8, is one minus the probability of a
547 type II error. Power is the probability that the test correctly
548 rejects the Null Hypothesis if the Alternative Hypothesis is true.
550 '''
552 nobs2 = nobs1*ratio
553 #pooled variance
554 if df is None:
555 df = (nobs1 - 1 + nobs2 - 1)
557 nobs = 1./ (1. / nobs1 + 1. / nobs2)
558 #print 'calling ttest power with', (effect_size, nobs, alpha, df, alternative)
559 return ttest_power(effect_size, nobs, alpha, df=df, alternative=alternative)
561 #method is only added to have explicit keywords and docstring
562 def solve_power(self, effect_size=None, nobs1=None, alpha=None, power=None,
563 ratio=1., alternative='two-sided'):
564 '''solve for any one parameter of the power of a two sample t-test
566 for t-test the keywords are:
567 effect_size, nobs1, alpha, power, ratio
569 exactly one needs to be ``None``, all others need numeric values
571 Parameters
572 ----------
573 effect_size : float
574 standardized effect size, difference between the two means divided
575 by the standard deviation. `effect_size` has to be positive.
576 nobs1 : int or float
577 number of observations of sample 1. The number of observations of
578 sample two is ratio times the size of sample 1,
579 i.e. ``nobs2 = nobs1 * ratio``
580 alpha : float in interval (0,1)
581 significance level, e.g. 0.05, is the probability of a type I
582 error, that is wrong rejections if the Null Hypothesis is true.
583 power : float in interval (0,1)
584 power of the test, e.g. 0.8, is one minus the probability of a
585 type II error. Power is the probability that the test correctly
586 rejects the Null Hypothesis if the Alternative Hypothesis is true.
587 ratio : float
588 ratio of the number of observations in sample 2 relative to
589 sample 1. see description of nobs1
590 The default for ratio is 1; to solve for ratio given the other
591 arguments it has to be explicitly set to None.
592 alternative : str, 'two-sided' (default), 'larger', 'smaller'
593 extra argument to choose whether the power is calculated for a
594 two-sided (default) or one sided test. The one-sided test can be
595 either 'larger', 'smaller'.
597 Returns
598 -------
599 value : float
600 The value of the parameter that was set to None in the call. The
601 value solves the power equation given the remaining parameters.
604 Notes
605 -----
606 The function uses scipy.optimize for finding the value that satisfies
607 the power equation. It first uses ``brentq`` with a prior search for
608 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve``
609 also fails, then, for ``alpha``, ``power`` and ``effect_size``,
610 ``brentq`` with fixed bounds is used. However, there can still be cases
611 where this fails.
613 '''
614 return super(TTestIndPower, self).solve_power(effect_size=effect_size,
615 nobs1=nobs1,
616 alpha=alpha,
617 power=power,
618 ratio=ratio,
619 alternative=alternative)
621class NormalIndPower(Power):
622 '''Statistical Power calculations for z-test for two independent samples.
624 currently only uses pooled variance
626 '''
628 def __init__(self, ddof=0, **kwds):
629 self.ddof = ddof
630 super(NormalIndPower, self).__init__(**kwds)
632 def power(self, effect_size, nobs1, alpha, ratio=1,
633 alternative='two-sided'):
634 '''Calculate the power of a z-test for two independent sample
636 Parameters
637 ----------
638 effect_size : float
639 standardized effect size, difference between the two means divided
640 by the standard deviation. effect size has to be positive.
641 nobs1 : int or float
642 number of observations of sample 1. The number of observations of
643 sample two is ratio times the size of sample 1,
644 i.e. ``nobs2 = nobs1 * ratio``
645 ``ratio`` can be set to zero in order to get the power for a
646 one sample test.
647 alpha : float in interval (0,1)
648 significance level, e.g. 0.05, is the probability of a type I
649 error, that is wrong rejections if the Null Hypothesis is true.
650 ratio : float
651 ratio of the number of observations in sample 2 relative to
652 sample 1. see description of nobs1
653 alternative : str, 'two-sided' (default), 'larger', 'smaller'
654 extra argument to choose whether the power is calculated for a
655 two-sided (default) or one sided test. The one-sided test can be
656 either 'larger', 'smaller'.
658 Returns
659 -------
660 power : float
661 Power of the test, e.g. 0.8, is one minus the probability of a
662 type II error. Power is the probability that the test correctly
663 rejects the Null Hypothesis if the Alternative Hypothesis is true.
665 '''
667 ddof = self.ddof # for correlation, ddof=3
669 # get effective nobs, factor for std of test statistic
670 if ratio > 0:
671 nobs2 = nobs1*ratio
672 #equivalent to nobs = n1*n2/(n1+n2)=n1*ratio/(1+ratio)
673 nobs = 1./ (1. / (nobs1 - ddof) + 1. / (nobs2 - ddof))
674 else:
675 nobs = nobs1 - ddof
676 return normal_power(effect_size, nobs, alpha, alternative=alternative)
678 #method is only added to have explicit keywords and docstring
679 def solve_power(self, effect_size=None, nobs1=None, alpha=None, power=None,
680 ratio=1., alternative='two-sided'):
681 '''solve for any one parameter of the power of a two sample z-test
683 for z-test the keywords are:
684 effect_size, nobs1, alpha, power, ratio
686 exactly one needs to be ``None``, all others need numeric values
688 Parameters
689 ----------
690 effect_size : float
691 standardized effect size, difference between the two means divided
692 by the standard deviation.
693 If ratio=0, then this is the standardized mean in the one sample
694 test.
695 nobs1 : int or float
696 number of observations of sample 1. The number of observations of
697 sample two is ratio times the size of sample 1,
698 i.e. ``nobs2 = nobs1 * ratio``
699 ``ratio`` can be set to zero in order to get the power for a
700 one sample test.
701 alpha : float in interval (0,1)
702 significance level, e.g. 0.05, is the probability of a type I
703 error, that is wrong rejections if the Null Hypothesis is true.
704 power : float in interval (0,1)
705 power of the test, e.g. 0.8, is one minus the probability of a
706 type II error. Power is the probability that the test correctly
707 rejects the Null Hypothesis if the Alternative Hypothesis is true.
708 ratio : float
709 ratio of the number of observations in sample 2 relative to
710 sample 1. see description of nobs1
711 The default for ratio is 1; to solve for ration given the other
712 arguments it has to be explicitly set to None.
713 alternative : str, 'two-sided' (default), 'larger', 'smaller'
714 extra argument to choose whether the power is calculated for a
715 two-sided (default) or one sided test. The one-sided test can be
716 either 'larger', 'smaller'.
718 Returns
719 -------
720 value : float
721 The value of the parameter that was set to None in the call. The
722 value solves the power equation given the remaining parameters.
725 Notes
726 -----
727 The function uses scipy.optimize for finding the value that satisfies
728 the power equation. It first uses ``brentq`` with a prior search for
729 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve``
730 also fails, then, for ``alpha``, ``power`` and ``effect_size``,
731 ``brentq`` with fixed bounds is used. However, there can still be cases
732 where this fails.
734 '''
735 return super(NormalIndPower, self).solve_power(effect_size=effect_size,
736 nobs1=nobs1,
737 alpha=alpha,
738 power=power,
739 ratio=ratio,
740 alternative=alternative)
743class FTestPower(Power):
744 '''Statistical Power calculations for generic F-test
746 '''
748 def power(self, effect_size, df_num, df_denom, alpha, ncc=1):
749 '''Calculate the power of a F-test.
751 Parameters
752 ----------
753 effect_size : float
754 standardized effect size, mean divided by the standard deviation.
755 effect size has to be positive.
756 df_num : int or float
757 numerator degrees of freedom.
758 df_denom : int or float
759 denominator degrees of freedom.
760 alpha : float in interval (0,1)
761 significance level, e.g. 0.05, is the probability of a type I
762 error, that is wrong rejections if the Null Hypothesis is true.
763 ncc : int
764 degrees of freedom correction for non-centrality parameter.
765 see Notes
767 Returns
768 -------
769 power : float
770 Power of the test, e.g. 0.8, is one minus the probability of a
771 type II error. Power is the probability that the test correctly
772 rejects the Null Hypothesis if the Alternative Hypothesis is true.
774 Notes
775 -----
777 sample size is given implicitly by df_num
779 set ncc=0 to match t-test, or f-test in LikelihoodModelResults.
780 ncc=1 matches the non-centrality parameter in R::pwr::pwr.f2.test
782 ftest_power with ncc=0 should also be correct for f_test in regression
783 models, with df_num and d_denom as defined there. (not verified yet)
784 '''
786 pow_ = ftest_power(effect_size, df_num, df_denom, alpha, ncc=ncc)
787 #print effect_size, df_num, df_denom, alpha, pow_
788 return pow_
790 #method is only added to have explicit keywords and docstring
791 def solve_power(self, effect_size=None, df_num=None, df_denom=None,
792 nobs=None, alpha=None, power=None, ncc=1):
793 '''solve for any one parameter of the power of a F-test
795 for the one sample F-test the keywords are:
796 effect_size, df_num, df_denom, alpha, power
798 Exactly one needs to be ``None``, all others need numeric values.
801 Parameters
802 ----------
803 effect_size : float
804 standardized effect size, mean divided by the standard deviation.
805 effect size has to be positive.
806 nobs : int or float
807 sample size, number of observations.
808 alpha : float in interval (0,1)
809 significance level, e.g. 0.05, is the probability of a type I
810 error, that is wrong rejections if the Null Hypothesis is true.
811 power : float in interval (0,1)
812 power of the test, e.g. 0.8, is one minus the probability of a
813 type II error. Power is the probability that the test correctly
814 rejects the Null Hypothesis if the Alternative Hypothesis is true.
815 alternative : str, 'two-sided' (default) or 'one-sided'
816 extra argument to choose whether the power is calculated for a
817 two-sided (default) or one sided test.
818 'one-sided' assumes we are in the relevant tail.
820 Returns
821 -------
822 value : float
823 The value of the parameter that was set to None in the call. The
824 value solves the power equation given the remaining parameters.
827 Notes
828 -----
829 The function uses scipy.optimize for finding the value that satisfies
830 the power equation. It first uses ``brentq`` with a prior search for
831 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve``
832 also fails, then, for ``alpha``, ``power`` and ``effect_size``,
833 ``brentq`` with fixed bounds is used. However, there can still be cases
834 where this fails.
836 '''
837 return super(FTestPower, self).solve_power(effect_size=effect_size,
838 df_num=df_num,
839 df_denom=df_denom,
840 alpha=alpha,
841 power=power,
842 ncc=ncc)
844class FTestAnovaPower(Power):
845 '''Statistical Power calculations F-test for one factor balanced ANOVA
847 '''
849 def power(self, effect_size, nobs, alpha, k_groups=2):
850 '''Calculate the power of a F-test for one factor ANOVA.
852 Parameters
853 ----------
854 effect_size : float
855 standardized effect size, mean divided by the standard deviation.
856 effect size has to be positive.
857 nobs : int or float
858 sample size, number of observations.
859 alpha : float in interval (0,1)
860 significance level, e.g. 0.05, is the probability of a type I
861 error, that is wrong rejections if the Null Hypothesis is true.
862 k_groups : int or float
863 number of groups in the ANOVA or k-sample comparison. Default is 2.
865 Returns
866 -------
867 power : float
868 Power of the test, e.g. 0.8, is one minus the probability of a
869 type II error. Power is the probability that the test correctly
870 rejects the Null Hypothesis if the Alternative Hypothesis is true.
872 '''
873 return ftest_anova_power(effect_size, nobs, alpha, k_groups=k_groups)
875 #method is only added to have explicit keywords and docstring
876 def solve_power(self, effect_size=None, nobs=None, alpha=None, power=None,
877 k_groups=2):
878 '''solve for any one parameter of the power of a F-test
880 for the one sample F-test the keywords are:
881 effect_size, nobs, alpha, power
883 Exactly one needs to be ``None``, all others need numeric values.
886 Parameters
887 ----------
888 effect_size : float
889 standardized effect size, mean divided by the standard deviation.
890 effect size has to be positive.
891 nobs : int or float
892 sample size, number of observations.
893 alpha : float in interval (0,1)
894 significance level, e.g. 0.05, is the probability of a type I
895 error, that is wrong rejections if the Null Hypothesis is true.
896 power : float in interval (0,1)
897 power of the test, e.g. 0.8, is one minus the probability of a
898 type II error. Power is the probability that the test correctly
899 rejects the Null Hypothesis if the Alternative Hypothesis is true.
901 Returns
902 -------
903 value : float
904 The value of the parameter that was set to None in the call. The
905 value solves the power equation given the remaining parameters.
908 Notes
909 -----
910 The function uses scipy.optimize for finding the value that satisfies
911 the power equation. It first uses ``brentq`` with a prior search for
912 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve``
913 also fails, then, for ``alpha``, ``power`` and ``effect_size``,
914 ``brentq`` with fixed bounds is used. However, there can still be cases
915 where this fails.
917 '''
918 # update start values for root finding
919 if k_groups is not None:
920 self.start_ttp['nobs'] = k_groups * 10
921 self.start_bqexp['nobs'] = dict(low=k_groups * 2,
922 start_upp=k_groups * 10)
923 # first attempt at special casing
924 if effect_size is None:
925 return self._solve_effect_size(effect_size=effect_size,
926 nobs=nobs,
927 alpha=alpha,
928 k_groups=k_groups,
929 power=power)
931 return super(FTestAnovaPower, self).solve_power(effect_size=effect_size,
932 nobs=nobs,
933 alpha=alpha,
934 k_groups=k_groups,
935 power=power)
937 def _solve_effect_size(self, effect_size=None, nobs=None, alpha=None,
938 power=None, k_groups=2):
939 '''experimental, test failure in solve_power for effect_size
940 '''
941 def func(x):
942 effect_size = x
943 return self._power_identity(effect_size=effect_size,
944 nobs=nobs,
945 alpha=alpha,
946 k_groups=k_groups,
947 power=power)
949 val, r = optimize.brentq(func, 1e-8, 1-1e-8, full_output=True)
950 if not r.converged:
951 print(r)
952 return val
955class GofChisquarePower(Power):
956 '''Statistical Power calculations for one sample chisquare test
958 '''
960 def power(self, effect_size, nobs, alpha, n_bins, ddof=0):#alternative='two-sided'):
961 '''Calculate the power of a chisquare test for one sample
963 Only two-sided alternative is implemented
965 Parameters
966 ----------
967 effect_size : float
968 standardized effect size, according to Cohen's definition.
969 see :func:`statsmodels.stats.gof.chisquare_effectsize`
970 nobs : int or float
971 sample size, number of observations.
972 alpha : float in interval (0,1)
973 significance level, e.g. 0.05, is the probability of a type I
974 error, that is wrong rejections if the Null Hypothesis is true.
975 n_bins : int
976 number of bins or cells in the distribution.
978 Returns
979 -------
980 power : float
981 Power of the test, e.g. 0.8, is one minus the probability of a
982 type II error. Power is the probability that the test correctly
983 rejects the Null Hypothesis if the Alternative Hypothesis is true.
985 '''
986 from statsmodels.stats.gof import chisquare_power
987 return chisquare_power(effect_size, nobs, n_bins, alpha, ddof=0)
989 #method is only added to have explicit keywords and docstring
990 def solve_power(self, effect_size=None, nobs=None, alpha=None,
991 power=None, n_bins=2):
992 '''solve for any one parameter of the power of a one sample chisquare-test
994 for the one sample chisquare-test the keywords are:
995 effect_size, nobs, alpha, power
997 Exactly one needs to be ``None``, all others need numeric values.
999 n_bins needs to be defined, a default=2 is used.
1002 Parameters
1003 ----------
1004 effect_size : float
1005 standardized effect size, according to Cohen's definition.
1006 see :func:`statsmodels.stats.gof.chisquare_effectsize`
1007 nobs : int or float
1008 sample size, number of observations.
1009 alpha : float in interval (0,1)
1010 significance level, e.g. 0.05, is the probability of a type I
1011 error, that is wrong rejections if the Null Hypothesis is true.
1012 power : float in interval (0,1)
1013 power of the test, e.g. 0.8, is one minus the probability of a
1014 type II error. Power is the probability that the test correctly
1015 rejects the Null Hypothesis if the Alternative Hypothesis is true.
1016 n_bins : int
1017 number of bins or cells in the distribution
1019 Returns
1020 -------
1021 value : float
1022 The value of the parameter that was set to None in the call. The
1023 value solves the power equation given the remaining parameters.
1026 Notes
1027 -----
1028 The function uses scipy.optimize for finding the value that satisfies
1029 the power equation. It first uses ``brentq`` with a prior search for
1030 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve``
1031 also fails, then, for ``alpha``, ``power`` and ``effect_size``,
1032 ``brentq`` with fixed bounds is used. However, there can still be cases
1033 where this fails.
1035 '''
1036 return super(GofChisquarePower, self).solve_power(effect_size=effect_size,
1037 nobs=nobs,
1038 n_bins=n_bins,
1039 alpha=alpha,
1040 power=power)
1042class _GofChisquareIndPower(Power):
1043 '''Statistical Power calculations for chisquare goodness-of-fit test
1045 TODO: this is not working yet
1046 for 2sample case need two nobs in function
1047 no one-sided chisquare test, is there one? use normal distribution?
1048 -> drop one-sided options?
1049 '''
1052 def power(self, effect_size, nobs1, alpha, ratio=1,
1053 alternative='two-sided'):
1054 '''Calculate the power of a chisquare for two independent sample
1056 Parameters
1057 ----------
1058 effect_size : float
1059 standardize effect size, difference between the two means divided
1060 by the standard deviation. effect size has to be positive.
1061 nobs1 : int or float
1062 number of observations of sample 1. The number of observations of
1063 sample two is ratio times the size of sample 1,
1064 i.e. ``nobs2 = nobs1 * ratio``
1065 alpha : float in interval (0,1)
1066 significance level, e.g. 0.05, is the probability of a type I
1067 error, that is wrong rejections if the Null Hypothesis is true.
1068 ratio : float
1069 ratio of the number of observations in sample 2 relative to
1070 sample 1. see description of nobs1
1071 The default for ratio is 1; to solve for ration given the other
1072 arguments it has to be explicitely set to None.
1073 alternative : str, 'two-sided' (default) or 'one-sided'
1074 extra argument to choose whether the power is calculated for a
1075 two-sided (default) or one sided test.
1076 'one-sided' assumes we are in the relevant tail.
1078 Returns
1079 -------
1080 power : float
1081 Power of the test, e.g. 0.8, is one minus the probability of a
1082 type II error. Power is the probability that the test correctly
1083 rejects the Null Hypothesis if the Alternative Hypothesis is true.
1085 '''
1087 from statsmodels.stats.gof import chisquare_power
1088 nobs2 = nobs1*ratio
1089 #equivalent to nobs = n1*n2/(n1+n2)=n1*ratio/(1+ratio)
1090 nobs = 1./ (1. / nobs1 + 1. / nobs2)
1091 return chisquare_power(effect_size, nobs, alpha)
1093 #method is only added to have explicit keywords and docstring
1094 def solve_power(self, effect_size=None, nobs1=None, alpha=None, power=None,
1095 ratio=1., alternative='two-sided'):
1096 '''solve for any one parameter of the power of a two sample z-test
1098 for z-test the keywords are:
1099 effect_size, nobs1, alpha, power, ratio
1101 exactly one needs to be ``None``, all others need numeric values
1103 Parameters
1104 ----------
1105 effect_size : float
1106 standardize effect size, difference between the two means divided
1107 by the standard deviation.
1108 nobs1 : int or float
1109 number of observations of sample 1. The number of observations of
1110 sample two is ratio times the size of sample 1,
1111 i.e. ``nobs2 = nobs1 * ratio``
1112 alpha : float in interval (0,1)
1113 significance level, e.g. 0.05, is the probability of a type I
1114 error, that is wrong rejections if the Null Hypothesis is true.
1115 power : float in interval (0,1)
1116 power of the test, e.g. 0.8, is one minus the probability of a
1117 type II error. Power is the probability that the test correctly
1118 rejects the Null Hypothesis if the Alternative Hypothesis is true.
1119 ratio : float
1120 ratio of the number of observations in sample 2 relative to
1121 sample 1. see description of nobs1
1122 The default for ratio is 1; to solve for ration given the other
1123 arguments it has to be explicitely set to None.
1124 alternative : str, 'two-sided' (default) or 'one-sided'
1125 extra argument to choose whether the power is calculated for a
1126 two-sided (default) or one sided test.
1127 'one-sided' assumes we are in the relevant tail.
1129 Returns
1130 -------
1131 value : float
1132 The value of the parameter that was set to None in the call. The
1133 value solves the power equation given the remaining parameters.
1136 Notes
1137 -----
1138 The function uses scipy.optimize for finding the value that satisfies
1139 the power equation. It first uses ``brentq`` with a prior search for
1140 bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve``
1141 also fails, then, for ``alpha``, ``power`` and ``effect_size``,
1142 ``brentq`` with fixed bounds is used. However, there can still be cases
1143 where this fails.
1145 '''
1146 return super(_GofChisquareIndPower, self).solve_power(effect_size=effect_size,
1147 nobs1=nobs1,
1148 alpha=alpha,
1149 power=power,
1150 ratio=ratio,
1151 alternative=alternative)
1153#shortcut functions
1154tt_solve_power = TTestPower().solve_power
1155tt_ind_solve_power = TTestIndPower().solve_power
1156zt_ind_solve_power = NormalIndPower().solve_power