Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/stats/weightstats.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1'''Ttests and descriptive statistics with weights
4Created on 2010-09-18
6Author: josef-pktd
7License: BSD (3-clause)
10References
11----------
12SPSS manual
13SAS manual
15This follows in large parts the SPSS manual, which is largely the same as
16the SAS manual with different, simpler notation.
18Freq, Weight in SAS seems redundant since they always show up as product, SPSS
19has only weights.
21Notes
22-----
24This has potential problems with ddof, I started to follow numpy with ddof=0
25by default and users can change it, but this might still mess up the t-tests,
26since the estimates for the standard deviation will be based on the ddof that
27the user chooses.
28- fixed ddof for the meandiff ttest, now matches scipy.stats.ttest_ind
30Note: scipy has now a separate, pooled variance option in ttest, but I have not
31compared yet.
33'''
36import numpy as np
37from scipy import stats
39from statsmodels.tools.decorators import cache_readonly
42class DescrStatsW(object):
43 '''descriptive statistics and tests with weights for case weights
45 Assumes that the data is 1d or 2d with (nobs, nvars) observations in rows,
46 variables in columns, and that the same weight applies to each column.
48 If degrees of freedom correction is used, then weights should add up to the
49 number of observations. ttest also assumes that the sum of weights
50 corresponds to the sample size.
52 This is essentially the same as replicating each observations by its
53 weight, if the weights are integers, often called case or frequency weights.
55 Parameters
56 ----------
57 data : array_like, 1-D or 2-D
58 dataset
59 weights : None or 1-D ndarray
60 weights for each observation, with same length as zero axis of data
61 ddof : int
62 default ddof=0, degrees of freedom correction used for second moments,
63 var, std, cov, corrcoef.
64 However, statistical tests are independent of `ddof`, based on the
65 standard formulas.
67 Examples
68 --------
70 >>> import numpy as np
71 >>> np.random.seed(0)
72 >>> x1_2d = 1.0 + np.random.randn(20, 3)
73 >>> w1 = np.random.randint(1, 4, 20)
74 >>> d1 = DescrStatsW(x1_2d, weights=w1)
75 >>> d1.mean
76 array([ 1.42739844, 1.23174284, 1.083753 ])
77 >>> d1.var
78 array([ 0.94855633, 0.52074626, 1.12309325])
79 >>> d1.std_mean
80 array([ 0.14682676, 0.10878944, 0.15976497])
82 >>> tstat, pval, df = d1.ttest_mean(0)
83 >>> tstat; pval; df
84 array([ 9.72165021, 11.32226471, 6.78342055])
85 array([ 1.58414212e-12, 1.26536887e-14, 2.37623126e-08])
86 44.0
88 >>> tstat, pval, df = d1.ttest_mean([0, 1, 1])
89 >>> tstat; pval; df
90 array([ 9.72165021, 2.13019609, 0.52422632])
91 array([ 1.58414212e-12, 3.87842808e-02, 6.02752170e-01])
92 44.0
94 #if weights are integers, then asrepeats can be used
96 >>> x1r = d1.asrepeats()
97 >>> x1r.shape
98 ...
99 >>> stats.ttest_1samp(x1r, [0, 1, 1])
100 ...
102 '''
103 def __init__(self, data, weights=None, ddof=0):
105 self.data = np.asarray(data)
106 if weights is None:
107 self.weights = np.ones(self.data.shape[0])
108 else:
109 # TODO: why squeeze?
110 self.weights = np.asarray(weights).squeeze().astype(float)
111 self.ddof = ddof
114 @cache_readonly
115 def sum_weights(self):
116 """Sum of weights"""
117 return self.weights.sum(0)
119 @cache_readonly
120 def nobs(self):
121 '''alias for number of observations/cases, equal to sum of weights
122 '''
123 return self.sum_weights
125 @cache_readonly
126 def sum(self):
127 '''weighted sum of data'''
128 return np.dot(self.data.T, self.weights)
130 @cache_readonly
131 def mean(self):
132 '''weighted mean of data'''
133 return self.sum / self.sum_weights
135 @cache_readonly
136 def demeaned(self):
137 '''data with weighted mean subtracted'''
138 return self.data - self.mean
140 @cache_readonly
141 def sumsquares(self):
142 '''weighted sum of squares of demeaned data'''
143 return np.dot((self.demeaned**2).T, self.weights)
145 #need memoize instead of cache decorator
146 def var_ddof(self, ddof=0):
147 '''variance of data given ddof
149 Parameters
150 ----------
151 ddof : int, float
152 degrees of freedom correction, independent of attribute ddof
154 Returns
155 -------
156 var : float, ndarray
157 variance with denominator ``sum_weights - ddof``
158 '''
159 return self.sumsquares / (self.sum_weights - ddof)
161 def std_ddof(self, ddof=0):
162 '''standard deviation of data with given ddof
164 Parameters
165 ----------
166 ddof : int, float
167 degrees of freedom correction, independent of attribute ddof
169 Returns
170 -------
171 std : float, ndarray
172 standard deviation with denominator ``sum_weights - ddof``
173 '''
174 return np.sqrt(self.var_ddof(ddof=ddof))
176 @cache_readonly
177 def var(self):
178 '''variance with default degrees of freedom correction
179 '''
180 return self.sumsquares / (self.sum_weights - self.ddof)
182 @cache_readonly
183 def _var(self):
184 '''variance without degrees of freedom correction
186 used for statistical tests with controlled ddof
187 '''
188 return self.sumsquares / self.sum_weights
190 @cache_readonly
191 def std(self):
192 '''standard deviation with default degrees of freedom correction
193 '''
194 return np.sqrt(self.var)
196 @cache_readonly
197 def cov(self):
198 '''weighted covariance of data if data is 2 dimensional
200 assumes variables in columns and observations in rows
201 uses default ddof
202 '''
203 cov_ = np.dot(self.weights * self.demeaned.T, self.demeaned)
204 cov_ /= (self.sum_weights - self.ddof)
205 return cov_
207 @cache_readonly
208 def corrcoef(self):
209 '''weighted correlation with default ddof
211 assumes variables in columns and observations in rows
212 '''
213 return self.cov / self.std / self.std[:,None]
215 @cache_readonly
216 def std_mean(self):
217 '''standard deviation of weighted mean
218 '''
219 std = self.std
220 if self.ddof != 0:
221 #ddof correction, (need copy of std)
222 std = std * np.sqrt((self.sum_weights - self.ddof)
223 / self.sum_weights)
225 return std / np.sqrt(self.sum_weights - 1)
228 def quantile(self, probs, return_pandas=True):
229 """
230 Compute quantiles for a weighted sample.
232 Parameters
233 ----------
234 probs : array_like
235 A vector of probability points at which to calculate the
236 quantiles. Each element of `probs` should fall in [0, 1].
237 return_pandas : bool
238 If True, return value is a Pandas DataFrame or Series.
239 Otherwise returns a ndarray.
241 Returns
242 -------
243 quantiles : Series, DataFrame, or ndarray
244 If `return_pandas` = True, returns one of the following:
245 * data are 1d, `return_pandas` = True: a Series indexed by
246 the probability points.
247 * data are 2d, `return_pandas` = True: a DataFrame with
248 the probability points as row index and the variables
249 as column index.
251 If `return_pandas` = False, returns an ndarray containing the
252 same values as the Series/DataFrame.
254 Notes
255 -----
256 To compute the quantiles, first, the weights are summed over
257 exact ties yielding distinct data values y_1 < y_2 < ..., and
258 corresponding weights w_1, w_2, .... Let s_j denote the sum
259 of the first j weights, and let W denote the sum of all the
260 weights. For a probability point p, if pW falls strictly
261 between s_j and s_{j+1} then the estimated quantile is
262 y_{j+1}. If pW = s_j then the estimated quantile is (y_j +
263 y_{j+1})/2. If pW < p_1 then the estimated quantile is y_1.
265 References
266 ----------
267 SAS documentation for weighted quantiles:
269 https://support.sas.com/documentation/cdl/en/procstat/63104/HTML/default/viewer.htm#procstat_univariate_sect028.htm
270 """
272 import pandas as pd
274 probs = np.asarray(probs)
275 probs = np.atleast_1d(probs)
277 if self.data.ndim == 1:
278 rslt = self._quantile(self.data, probs)
279 if return_pandas:
280 rslt = pd.Series(rslt, index=probs)
281 else:
282 rslt = []
283 for vec in self.data.T:
284 rslt.append(self._quantile(vec, probs))
285 rslt = np.column_stack(rslt)
286 if return_pandas:
287 columns = ["col%d" % (j+1) for j in range(rslt.shape[1])]
288 rslt = pd.DataFrame(data=rslt, columns=columns, index=probs)
290 if return_pandas:
291 rslt.index.name = "p"
293 return rslt
296 def _quantile(self, vec, probs):
297 # Helper function to calculate weighted quantiles for one column.
298 # Follows definition from SAS documentation.
299 # Returns ndarray
301 import pandas as pd
303 # Aggregate over ties
304 df = pd.DataFrame(index=np.arange(len(self.weights)))
305 df["weights"] = self.weights
306 df["vec"] = vec
307 dfg = df.groupby("vec").agg(np.sum)
308 weights = dfg.values[:, 0]
309 values = np.asarray(dfg.index)
311 cweights = np.cumsum(weights)
312 totwt = cweights[-1]
313 targets = probs * totwt
314 ii = np.searchsorted(cweights, targets)
316 rslt = values[ii]
318 # Exact hits
319 jj = np.flatnonzero(np.abs(targets - cweights[ii]) < 1e-10)
320 jj = jj[ii[jj] < len(cweights) - 1]
321 rslt[jj] = (values[ii[jj]] + values[ii[jj]+1]) / 2
323 return rslt
326 def tconfint_mean(self, alpha=0.05, alternative='two-sided'):
327 '''two-sided confidence interval for weighted mean of data
329 If the data is 2d, then these are separate confidence intervals
330 for each column.
332 Parameters
333 ----------
334 alpha : float
335 significance level for the confidence interval, coverage is
336 ``1-alpha``
337 alternative : str
338 This specifies the alternative hypothesis for the test that
339 corresponds to the confidence interval.
340 The alternative hypothesis, H1, has to be one of the following
342 'two-sided': H1: mean not equal to value (default)
343 'larger' : H1: mean larger than value
344 'smaller' : H1: mean smaller than value
346 Returns
347 -------
348 lower, upper : floats or ndarrays
349 lower and upper bound of confidence interval
351 Notes
352 -----
353 In a previous version, statsmodels 0.4, alpha was the confidence
354 level, e.g. 0.95
355 '''
356 #TODO: add asymmetric
357 dof = self.sum_weights - 1
358 ci = _tconfint_generic(self.mean, self.std_mean, dof, alpha,
359 alternative)
360 return ci
363 def zconfint_mean(self, alpha=0.05, alternative='two-sided'):
364 '''two-sided confidence interval for weighted mean of data
366 Confidence interval is based on normal distribution.
367 If the data is 2d, then these are separate confidence intervals
368 for each column.
370 Parameters
371 ----------
372 alpha : float
373 significance level for the confidence interval, coverage is
374 ``1-alpha``
375 alternative : str
376 This specifies the alternative hypothesis for the test that
377 corresponds to the confidence interval.
378 The alternative hypothesis, H1, has to be one of the following
380 'two-sided': H1: mean not equal to value (default)
381 'larger' : H1: mean larger than value
382 'smaller' : H1: mean smaller than value
384 Returns
385 -------
386 lower, upper : floats or ndarrays
387 lower and upper bound of confidence interval
389 Notes
390 -----
391 In a previous version, statsmodels 0.4, alpha was the confidence
392 level, e.g. 0.95
393 '''
395 return _zconfint_generic(self.mean, self.std_mean, alpha, alternative)
398 def ttest_mean(self, value=0, alternative='two-sided'):
399 '''ttest of Null hypothesis that mean is equal to value.
401 The alternative hypothesis H1 is defined by the following
402 'two-sided': H1: mean not equal to value
403 'larger' : H1: mean larger than value
404 'smaller' : H1: mean smaller than value
406 Parameters
407 ----------
408 value : float or array
409 the hypothesized value for the mean
410 alternative : str
411 The alternative hypothesis, H1, has to be one of the following
413 'two-sided': H1: mean not equal to value (default)
414 'larger' : H1: mean larger than value
415 'smaller' : H1: mean smaller than value
417 Returns
418 -------
419 tstat : float
420 test statistic
421 pvalue : float
422 pvalue of the t-test
423 df : int or float
425 '''
426 #TODO: check direction with R, smaller=less, larger=greater
427 tstat = (self.mean - value) / self.std_mean
428 dof = self.sum_weights - 1
429 #TODO: use outsourced
430 if alternative == 'two-sided':
431 pvalue = stats.t.sf(np.abs(tstat), dof)*2
432 elif alternative == 'larger':
433 pvalue = stats.t.sf(tstat, dof)
434 elif alternative == 'smaller':
435 pvalue = stats.t.cdf(tstat, dof)
437 return tstat, pvalue, dof
439 def ttost_mean(self, low, upp):
440 '''test of (non-)equivalence of one sample
442 TOST: two one-sided t tests
444 null hypothesis: m < low or m > upp
445 alternative hypothesis: low < m < upp
447 where m is the expected value of the sample (mean of the population).
449 If the pvalue is smaller than a threshold, say 0.05, then we reject the
450 hypothesis that the expected value of the sample (mean of the
451 population) is outside of the interval given by thresholds low and upp.
453 Parameters
454 ----------
455 low, upp : float
456 equivalence interval low < mean < upp
458 Returns
459 -------
460 pvalue : float
461 pvalue of the non-equivalence test
462 t1, pv1, df1 : tuple
463 test statistic, pvalue and degrees of freedom for lower threshold
464 test
465 t2, pv2, df2 : tuple
466 test statistic, pvalue and degrees of freedom for upper threshold
467 test
469 '''
471 t1, pv1, df1 = self.ttest_mean(low, alternative='larger')
472 t2, pv2, df2 = self.ttest_mean(upp, alternative='smaller')
473 return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2)
475 def ztest_mean(self, value=0, alternative='two-sided'):
476 '''z-test of Null hypothesis that mean is equal to value.
478 The alternative hypothesis H1 is defined by the following
479 'two-sided': H1: mean not equal to value
480 'larger' : H1: mean larger than value
481 'smaller' : H1: mean smaller than value
483 Parameters
484 ----------
485 value : float or array
486 the hypothesized value for the mean
487 alternative : str
488 The alternative hypothesis, H1, has to be one of the following
490 'two-sided': H1: mean not equal to value (default)
491 'larger' : H1: mean larger than value
492 'smaller' : H1: mean smaller than value
494 Returns
495 -------
496 tstat : float
497 test statistic
498 pvalue : float
499 pvalue of the t-test
501 Notes
502 -----
503 This uses the same degrees of freedom correction as the t-test in the
504 calculation of the standard error of the mean, i.e it uses
505 `(sum_weights - 1)` instead of `sum_weights` in the denominator.
506 See Examples below for the difference.
508 Examples
509 --------
511 z-test on a proportion, with 20 observations, 15 of those are our event
513 >>> import statsmodels.api as sm
514 >>> x1 = [0, 1]
515 >>> w1 = [5, 15]
516 >>> d1 = sm.stats.DescrStatsW(x1, w1)
517 >>> d1.ztest_mean(0.5)
518 (2.5166114784235836, 0.011848940928347452)
520 This differs from the proportions_ztest because of the degrees of
521 freedom correction:
522 >>> sm.stats.proportions_ztest(15, 20.0, value=0.5)
523 (2.5819888974716112, 0.009823274507519247).
525 We can replicate the results from ``proportions_ztest`` if we increase
526 the weights to have artificially one more observation:
528 >>> sm.stats.DescrStatsW(x1, np.array(w1)*21./20).ztest_mean(0.5)
529 (2.5819888974716116, 0.0098232745075192366)
530 '''
531 tstat = (self.mean - value) / self.std_mean
532 #TODO: use outsourced
533 if alternative == 'two-sided':
534 pvalue = stats.norm.sf(np.abs(tstat))*2
535 elif alternative == 'larger':
536 pvalue = stats.norm.sf(tstat)
537 elif alternative == 'smaller':
538 pvalue = stats.norm.cdf(tstat)
540 return tstat, pvalue
542 def ztost_mean(self, low, upp):
543 '''test of (non-)equivalence of one sample, based on z-test
545 TOST: two one-sided z-tests
547 null hypothesis: m < low or m > upp
548 alternative hypothesis: low < m < upp
550 where m is the expected value of the sample (mean of the population).
552 If the pvalue is smaller than a threshold, say 0.05, then we reject the
553 hypothesis that the expected value of the sample (mean of the
554 population) is outside of the interval given by thresholds low and upp.
556 Parameters
557 ----------
558 low, upp : float
559 equivalence interval low < mean < upp
561 Returns
562 -------
563 pvalue : float
564 pvalue of the non-equivalence test
565 t1, pv1 : tuple
566 test statistic and p-value for lower threshold test
567 t2, pv2 : tuple
568 test statistic and p-value for upper threshold test
570 '''
572 t1, pv1 = self.ztest_mean(low, alternative='larger')
573 t2, pv2 = self.ztest_mean(upp, alternative='smaller')
574 return np.maximum(pv1, pv2), (t1, pv1), (t2, pv2)
576 def get_compare(self, other, weights=None):
577 '''return an instance of CompareMeans with self and other
579 Parameters
580 ----------
581 other : array_like or instance of DescrStatsW
582 If array_like then this creates an instance of DescrStatsW with
583 the given weights.
584 weights : None or array
585 weights are only used if other is not an instance of DescrStatsW
587 Returns
588 -------
589 cm : instance of CompareMeans
590 the instance has self attached as d1 and other as d2.
592 See Also
593 --------
594 CompareMeans
596 '''
597 if not isinstance(other, self.__class__):
598 d2 = DescrStatsW(other, weights)
599 else:
600 d2 = other
601 return CompareMeans(self, d2)
603 def asrepeats(self):
604 '''get array that has repeats given by floor(weights)
606 observations with weight=0 are dropped
608 '''
609 w_int = np.floor(self.weights).astype(int)
610 return np.repeat(self.data, w_int, axis=0)
614def _tstat_generic(value1, value2, std_diff, dof, alternative, diff=0):
615 '''generic ttest to save typing'''
617 tstat = (value1 - value2 - diff) / std_diff
618 if alternative in ['two-sided', '2-sided', '2s']:
619 pvalue = stats.t.sf(np.abs(tstat), dof)*2
620 elif alternative in ['larger', 'l']:
621 pvalue = stats.t.sf(tstat, dof)
622 elif alternative in ['smaller', 's']:
623 pvalue = stats.t.cdf(tstat, dof)
624 else:
625 raise ValueError('invalid alternative')
626 return tstat, pvalue
628def _tconfint_generic(mean, std_mean, dof, alpha, alternative):
629 '''generic t-confint to save typing'''
631 if alternative in ['two-sided', '2-sided', '2s']:
632 tcrit = stats.t.ppf(1 - alpha / 2., dof)
633 lower = mean - tcrit * std_mean
634 upper = mean + tcrit * std_mean
635 elif alternative in ['larger', 'l']:
636 tcrit = stats.t.ppf(alpha, dof)
637 lower = mean + tcrit * std_mean
638 upper = np.inf
639 elif alternative in ['smaller', 's']:
640 tcrit = stats.t.ppf(1 - alpha, dof)
641 lower = -np.inf
642 upper = mean + tcrit * std_mean
643 else:
644 raise ValueError('invalid alternative')
646 return lower, upper
649def _zstat_generic(value1, value2, std_diff, alternative, diff=0):
650 '''generic (normal) z-test to save typing
652 can be used as ztest based on summary statistics
654 '''
655 zstat = (value1 - value2 - diff) / std_diff
656 if alternative in ['two-sided', '2-sided', '2s']:
657 pvalue = stats.norm.sf(np.abs(zstat))*2
658 elif alternative in ['larger', 'l']:
659 pvalue = stats.norm.sf(zstat)
660 elif alternative in ['smaller', 's']:
661 pvalue = stats.norm.cdf(zstat)
662 else:
663 raise ValueError('invalid alternative')
664 return zstat, pvalue
666def _zstat_generic2(value, std_diff, alternative):
667 '''generic (normal) z-test to save typing
669 can be used as ztest based on summary statistics
670 '''
671 zstat = value / std_diff
672 if alternative in ['two-sided', '2-sided', '2s']:
673 pvalue = stats.norm.sf(np.abs(zstat))*2
674 elif alternative in ['larger', 'l']:
675 pvalue = stats.norm.sf(zstat)
676 elif alternative in ['smaller', 's']:
677 pvalue = stats.norm.cdf(zstat)
678 else:
679 raise ValueError('invalid alternative')
680 return zstat, pvalue
682def _zconfint_generic(mean, std_mean, alpha, alternative):
683 '''generic normal-confint to save typing'''
685 if alternative in ['two-sided', '2-sided', '2s']:
686 zcrit = stats.norm.ppf(1 - alpha / 2.)
687 lower = mean - zcrit * std_mean
688 upper = mean + zcrit * std_mean
689 elif alternative in ['larger', 'l']:
690 zcrit = stats.norm.ppf(alpha)
691 lower = mean + zcrit * std_mean
692 upper = np.inf
693 elif alternative in ['smaller', 's']:
694 zcrit = stats.norm.ppf(1 - alpha)
695 lower = -np.inf
696 upper = mean + zcrit * std_mean
697 else:
698 raise ValueError('invalid alternative')
700 return lower, upper
703class CompareMeans(object):
704 '''class for two sample comparison
706 The tests and the confidence interval work for multi-endpoint comparison:
707 If d1 and d2 have the same number of rows, then each column of the data
708 in d1 is compared with the corresponding column in d2.
710 Parameters
711 ----------
712 d1, d2 : instances of DescrStatsW
714 Notes
715 -----
716 The result for the statistical tests and the confidence interval are
717 independent of the user specified ddof.
719 TODO: Extend to any number of groups or write a version that works in that
720 case, like in SAS and SPSS.
722 '''
724 def __init__(self, d1, d2):
725 '''assume d1, d2 hold the relevant attributes
727 '''
728 self.d1 = d1
729 self.d2 = d2
730 #assume nobs is available
731# if not hasattr(self.d1, 'nobs'):
732# d1.nobs1 = d1.sum_weights.astype(float) #float just to make sure
733# self.nobs2 = d2.sum_weights.astype(float)
735 @classmethod
736 def from_data(cls, data1, data2, weights1=None, weights2=None,
737 ddof1=0, ddof2=0):
738 '''construct a CompareMeans object from data
740 Parameters
741 ----------
742 data1, data2 : array_like, 1-D or 2-D
743 compared datasets
744 weights1, weights2 : None or 1-D ndarray
745 weights for each observation of data1 and data2 respectively,
746 with same length as zero axis of corresponding dataset.
747 ddof1, ddof2 : int
748 default ddof1=0, ddof2=0, degrees of freedom for data1,
749 data2 respectively.
751 Returns
752 -------
753 A CompareMeans instance.
755 '''
756 return cls(DescrStatsW(data1, weights=weights1, ddof=ddof1),
757 DescrStatsW(data2, weights=weights2, ddof=ddof2))
759 def summary(self, use_t=True, alpha=0.05, usevar='pooled', value=0):
760 '''summarize the results of the hypothesis test
762 Parameters
763 ----------
764 use_t : bool, optional
765 if use_t is True, then t test results are returned
766 if use_t is False, then z test results are returned
767 alpha : float
768 significance level for the confidence interval, coverage is
769 ``1-alpha``
770 usevar : str, 'pooled' or 'unequal'
771 If ``pooled``, then the standard deviation of the samples is
772 assumed to be the same. If ``unequal``, then the variance of
773 Welsh ttest will be used, and the degrees of freedom are those
774 of Satterthwaite if ``use_t`` is True.
775 value : float
776 difference between the means under the Null hypothesis.
778 Returns
779 -------
780 smry : SimpleTable
782 '''
784 d1 = self.d1
785 d2 = self.d2
787 confint_percents = 100 - alpha * 100
789 if use_t:
790 tstat, pvalue, _ = self.ttest_ind(usevar=usevar, value=value)
791 lower, upper = self.tconfint_diff(alpha=alpha, usevar=usevar)
792 else:
793 tstat, pvalue = self.ztest_ind(usevar=usevar, value=value)
794 lower, upper = self.zconfint_diff(alpha=alpha, usevar=usevar)
796 if usevar == 'pooled':
797 std_err = self.std_meandiff_pooledvar
798 else:
799 std_err = self.std_meandiff_separatevar
801 std_err = np.atleast_1d(std_err)
802 tstat = np.atleast_1d(tstat)
803 pvalue = np.atleast_1d(pvalue)
804 lower = np.atleast_1d(lower)
805 upper = np.atleast_1d(upper)
806 conf_int = np.column_stack((lower, upper))
807 params = np.atleast_1d(d1.mean - d2.mean - value)
809 title = 'Test for equality of means'
810 yname = 'y' # not used in params_frame
811 xname = ['subset #%d' % (ii + 1) for ii in range(tstat.shape[0])]
813 from statsmodels.iolib.summary import summary_params
814 return summary_params((None, params, std_err, tstat, pvalue, conf_int),
815 alpha=alpha, use_t=use_t, yname=yname, xname=xname,
816 title=title)
818 @cache_readonly
819 def std_meandiff_separatevar(self):
820 #this uses ``_var`` to use ddof=0 for formula
821 d1 = self.d1
822 d2 = self.d2
823 return np.sqrt(d1._var / (d1.nobs-1) + d2._var / (d2.nobs-1))
825 @cache_readonly
826 def std_meandiff_pooledvar(self):
827 '''variance assuming equal variance in both data sets
829 '''
830 #this uses ``_var`` to use ddof=0 for formula
832 d1 = self.d1
833 d2 = self.d2
834 #could make var_pooled into attribute
835 var_pooled = ((d1.sumsquares + d2.sumsquares) /
836 #(d1.nobs - d1.ddof + d2.nobs - d2.ddof))
837 (d1.nobs - 1 + d2.nobs - 1))
838 return np.sqrt(var_pooled * (1. / d1.nobs + 1. /d2.nobs))
840 def dof_satt(self):
841 '''degrees of freedom of Satterthwaite for unequal variance
842 '''
843 d1 = self.d1
844 d2 = self.d2
845 #this follows blindly the SPSS manual
846 #except I use ``_var`` which has ddof=0
847 sem1 = d1._var / (d1.nobs-1)
848 sem2 = d2._var / (d2.nobs-1)
849 semsum = sem1 + sem2
850 z1 = (sem1 / semsum)**2 / (d1.nobs - 1)
851 z2 = (sem2 / semsum)**2 / (d2.nobs - 1)
852 dof = 1. / (z1 + z2)
853 return dof
855 def ttest_ind(self, alternative='two-sided', usevar='pooled', value=0):
856 '''ttest for the null hypothesis of identical means
858 this should also be the same as onewaygls, except for ddof differences
860 Parameters
861 ----------
862 x1 : array_like, 1-D or 2-D
863 first of the two independent samples, see notes for 2-D case
864 x2 : array_like, 1-D or 2-D
865 second of the two independent samples, see notes for 2-D case
866 alternative : str
867 The alternative hypothesis, H1, has to be one of the following
868 'two-sided': H1: difference in means not equal to value (default)
869 'larger' : H1: difference in means larger than value
870 'smaller' : H1: difference in means smaller than value
872 usevar : str, 'pooled' or 'unequal'
873 If ``pooled``, then the standard deviation of the samples is assumed to be
874 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees
875 of freedom is used
876 value : float
877 difference between the means under the Null hypothesis.
880 Returns
881 -------
882 tstat : float
883 test statistic
884 pvalue : float
885 pvalue of the t-test
886 df : int or float
887 degrees of freedom used in the t-test
889 Notes
890 -----
891 The result is independent of the user specified ddof.
893 '''
894 d1 = self.d1
895 d2 = self.d2
897 if usevar == 'pooled':
898 stdm = self.std_meandiff_pooledvar
899 dof = (d1.nobs - 1 + d2.nobs - 1)
900 elif usevar == 'unequal':
901 stdm = self.std_meandiff_separatevar
902 dof = self.dof_satt()
903 else:
904 raise ValueError('usevar can only be "pooled" or "unequal"')
906 tstat, pval = _tstat_generic(d1.mean, d2.mean, stdm, dof, alternative,
907 diff=value)
909 return tstat, pval, dof
911 def ztest_ind(self, alternative='two-sided', usevar='pooled', value=0):
912 '''z-test for the null hypothesis of identical means
914 Parameters
915 ----------
916 x1 : array_like, 1-D or 2-D
917 first of the two independent samples, see notes for 2-D case
918 x2 : array_like, 1-D or 2-D
919 second of the two independent samples, see notes for 2-D case
920 alternative : str
921 The alternative hypothesis, H1, has to be one of the following
922 'two-sided': H1: difference in means not equal to value (default)
923 'larger' : H1: difference in means larger than value
924 'smaller' : H1: difference in means smaller than value
926 usevar : str, 'pooled' or 'unequal'
927 If ``pooled``, then the standard deviation of the samples is assumed to be
928 the same. If ``unequal``, then the standard deviations of the samples may
929 be different.
930 value : float
931 difference between the means under the Null hypothesis.
933 Returns
934 -------
935 tstat : float
936 test statistic
937 pvalue : float
938 pvalue of the z-test
940 '''
941 d1 = self.d1
942 d2 = self.d2
944 if usevar == 'pooled':
945 stdm = self.std_meandiff_pooledvar
946 elif usevar == 'unequal':
947 stdm = self.std_meandiff_separatevar
948 else:
949 raise ValueError('usevar can only be "pooled" or "unequal"')
951 tstat, pval = _zstat_generic(d1.mean, d2.mean, stdm, alternative,
952 diff=value)
954 return tstat, pval
956 def tconfint_diff(self, alpha=0.05, alternative='two-sided',
957 usevar='pooled'):
958 '''confidence interval for the difference in means
960 Parameters
961 ----------
962 alpha : float
963 significance level for the confidence interval, coverage is
964 ``1-alpha``
965 alternative : str
966 This specifies the alternative hypothesis for the test that
967 corresponds to the confidence interval.
968 The alternative hypothesis, H1, has to be one of the following :
970 'two-sided': H1: difference in means not equal to value (default)
971 'larger' : H1: difference in means larger than value
972 'smaller' : H1: difference in means smaller than value
974 usevar : str, 'pooled' or 'unequal'
975 If ``pooled``, then the standard deviation of the samples is assumed to be
976 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees
977 of freedom is used
979 Returns
980 -------
981 lower, upper : floats
982 lower and upper limits of the confidence interval
984 Notes
985 -----
986 The result is independent of the user specified ddof.
988 '''
989 d1 = self.d1
990 d2 = self.d2
991 diff = d1.mean - d2.mean
992 if usevar == 'pooled':
993 std_diff = self.std_meandiff_pooledvar
994 dof = (d1.nobs - 1 + d2.nobs - 1)
995 elif usevar == 'unequal':
996 std_diff = self.std_meandiff_separatevar
997 dof = self.dof_satt()
998 else:
999 raise ValueError('usevar can only be "pooled" or "unequal"')
1001 res = _tconfint_generic(diff, std_diff, dof, alpha=alpha,
1002 alternative=alternative)
1003 return res
1005 def zconfint_diff(self, alpha=0.05, alternative='two-sided',
1006 usevar='pooled'):
1007 '''confidence interval for the difference in means
1009 Parameters
1010 ----------
1011 alpha : float
1012 significance level for the confidence interval, coverage is
1013 ``1-alpha``
1014 alternative : str
1015 This specifies the alternative hypothesis for the test that
1016 corresponds to the confidence interval.
1017 The alternative hypothesis, H1, has to be one of the following :
1019 'two-sided': H1: difference in means not equal to value (default)
1020 'larger' : H1: difference in means larger than value
1021 'smaller' : H1: difference in means smaller than value
1023 usevar : str, 'pooled' or 'unequal'
1024 If ``pooled``, then the standard deviation of the samples is assumed to be
1025 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees
1026 of freedom is used
1028 Returns
1029 -------
1030 lower, upper : floats
1031 lower and upper limits of the confidence interval
1033 Notes
1034 -----
1035 The result is independent of the user specified ddof.
1037 '''
1038 d1 = self.d1
1039 d2 = self.d2
1040 diff = d1.mean - d2.mean
1041 if usevar == 'pooled':
1042 std_diff = self.std_meandiff_pooledvar
1043 elif usevar == 'unequal':
1044 std_diff = self.std_meandiff_separatevar
1045 else:
1046 raise ValueError('usevar can only be "pooled" or "unequal"')
1048 res = _zconfint_generic(diff, std_diff, alpha=alpha,
1049 alternative=alternative)
1050 return res
1052 def ttost_ind(self, low, upp, usevar='pooled'):
1053 '''
1054 test of equivalence for two independent samples, base on t-test
1056 Parameters
1057 ----------
1058 low, upp : float
1059 equivalence interval low < m1 - m2 < upp
1060 usevar : str, 'pooled' or 'unequal'
1061 If ``pooled``, then the standard deviation of the samples is assumed to be
1062 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees
1063 of freedom is used
1065 Returns
1066 -------
1067 pvalue : float
1068 pvalue of the non-equivalence test
1069 t1, pv1 : tuple of floats
1070 test statistic and pvalue for lower threshold test
1071 t2, pv2 : tuple of floats
1072 test statistic and pvalue for upper threshold test
1073 '''
1074 tt1 = self.ttest_ind(alternative='larger', usevar=usevar, value=low)
1075 tt2 = self.ttest_ind(alternative='smaller', usevar=usevar, value=upp)
1076 #TODO: remove tuple return, use same as for function tost_ind
1077 return np.maximum(tt1[1], tt2[1]), (tt1, tt2)
1079 def ztost_ind(self, low, upp, usevar='pooled'):
1080 '''
1081 test of equivalence for two independent samples, based on z-test
1083 Parameters
1084 ----------
1085 low, upp : float
1086 equivalence interval low < m1 - m2 < upp
1087 usevar : str, 'pooled' or 'unequal'
1088 If ``pooled``, then the standard deviation of the samples is assumed to be
1089 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees
1090 of freedom is used
1092 Returns
1093 -------
1094 pvalue : float
1095 pvalue of the non-equivalence test
1096 t1, pv1 : tuple of floats
1097 test statistic and pvalue for lower threshold test
1098 t2, pv2 : tuple of floats
1099 test statistic and pvalue for upper threshold test
1100 '''
1101 tt1 = self.ztest_ind(alternative='larger', usevar=usevar, value=low)
1102 tt2 = self.ztest_ind(alternative='smaller', usevar=usevar, value=upp)
1103 #TODO: remove tuple return, use same as for function tost_ind
1104 return np.maximum(tt1[1], tt2[1]), tt1, tt2
1106 #tost.__doc__ = tost_ind.__doc__
1108#does not work for 2d, does not take weights into account
1109## def test_equal_var(self):
1110## '''Levene test for independence
1111##
1112## '''
1113## d1 = self.d1
1114## d2 = self.d2
1115## #rewrite this, for now just use scipy.stats
1116## return stats.levene(d1.data, d2.data)
1119def ttest_ind(x1, x2, alternative='two-sided', usevar='pooled',
1120 weights=(None, None), value=0):
1121 '''ttest independent sample
1123 Convenience function that uses the classes and throws away the intermediate
1124 results,
1125 compared to scipy stats: drops axis option, adds alternative, usevar, and
1126 weights option.
1128 Parameters
1129 ----------
1130 x1 : array_like, 1-D or 2-D
1131 first of the two independent samples, see notes for 2-D case
1132 x2 : array_like, 1-D or 2-D
1133 second of the two independent samples, see notes for 2-D case
1134 alternative : str
1135 The alternative hypothesis, H1, has to be one of the following
1137 * 'two-sided' (default): H1: difference in means not equal to value
1138 * 'larger' : H1: difference in means larger than value
1139 * 'smaller' : H1: difference in means smaller than value
1141 usevar : str, 'pooled' or 'unequal'
1142 If ``pooled``, then the standard deviation of the samples is assumed to be
1143 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees
1144 of freedom is used
1145 weights : tuple of None or ndarrays
1146 Case weights for the two samples. For details on weights see
1147 ``DescrStatsW``
1148 value : float
1149 difference between the means under the Null hypothesis.
1152 Returns
1153 -------
1154 tstat : float
1155 test statistic
1156 pvalue : float
1157 pvalue of the t-test
1158 df : int or float
1159 degrees of freedom used in the t-test
1161 '''
1162 cm = CompareMeans(DescrStatsW(x1, weights=weights[0], ddof=0),
1163 DescrStatsW(x2, weights=weights[1], ddof=0))
1164 tstat, pval, dof = cm.ttest_ind(alternative=alternative, usevar=usevar,
1165 value=value)
1167 return tstat, pval, dof
1170def ttost_ind(x1, x2, low, upp, usevar='pooled', weights=(None, None),
1171 transform=None):
1172 '''test of (non-)equivalence for two independent samples
1174 TOST: two one-sided t tests
1176 null hypothesis: m1 - m2 < low or m1 - m2 > upp
1177 alternative hypothesis: low < m1 - m2 < upp
1179 where m1, m2 are the means, expected values of the two samples.
1181 If the pvalue is smaller than a threshold, say 0.05, then we reject the
1182 hypothesis that the difference between the two samples is larger than the
1183 the thresholds given by low and upp.
1185 Parameters
1186 ----------
1187 x1 : array_like, 1-D or 2-D
1188 first of the two independent samples, see notes for 2-D case
1189 x2 : array_like, 1-D or 2-D
1190 second of the two independent samples, see notes for 2-D case
1191 low, upp : float
1192 equivalence interval low < m1 - m2 < upp
1193 usevar : str, 'pooled' or 'unequal'
1194 If ``pooled``, then the standard deviation of the samples is assumed to be
1195 the same. If ``unequal``, then Welsh ttest with Satterthwait degrees
1196 of freedom is used
1197 weights : tuple of None or ndarrays
1198 Case weights for the two samples. For details on weights see
1199 ``DescrStatsW``
1200 transform : None or function
1201 If None (default), then the data is not transformed. Given a function,
1202 sample data and thresholds are transformed. If transform is log, then
1203 the equivalence interval is in ratio: low < m1 / m2 < upp
1205 Returns
1206 -------
1207 pvalue : float
1208 pvalue of the non-equivalence test
1209 t1, pv1 : tuple of floats
1210 test statistic and pvalue for lower threshold test
1211 t2, pv2 : tuple of floats
1212 test statistic and pvalue for upper threshold test
1214 Notes
1215 -----
1216 The test rejects if the 2*alpha confidence interval for the difference
1217 is contained in the ``(low, upp)`` interval.
1219 This test works also for multi-endpoint comparisons: If d1 and d2
1220 have the same number of columns, then each column of the data in d1 is
1221 compared with the corresponding column in d2. This is the same as
1222 comparing each of the corresponding columns separately. Currently no
1223 multi-comparison correction is used. The raw p-values reported here can
1224 be correction with the functions in ``multitest``.
1226 '''
1228 if transform:
1229 if transform is np.log:
1230 #avoid hstack in special case
1231 x1 = transform(x1)
1232 x2 = transform(x2)
1233 else:
1234 #for transforms like rankdata that will need both datasets
1235 #concatenate works for stacking 1d and 2d arrays
1236 xx = transform(np.concatenate((x1, x2), 0))
1237 x1 = xx[:len(x1)]
1238 x2 = xx[len(x1):]
1239 low = transform(low)
1240 upp = transform(upp)
1241 cm = CompareMeans(DescrStatsW(x1, weights=weights[0], ddof=0),
1242 DescrStatsW(x2, weights=weights[1], ddof=0))
1243 pval, res = cm.ttost_ind(low, upp, usevar=usevar)
1244 return pval, res[0], res[1]
1246def ttost_paired(x1, x2, low, upp, transform=None, weights=None):
1247 '''test of (non-)equivalence for two dependent, paired sample
1249 TOST: two one-sided t tests
1251 null hypothesis: md < low or md > upp
1252 alternative hypothesis: low < md < upp
1254 where md is the mean, expected value of the difference x1 - x2
1256 If the pvalue is smaller than a threshold,say 0.05, then we reject the
1257 hypothesis that the difference between the two samples is larger than the
1258 the thresholds given by low and upp.
1260 Parameters
1261 ----------
1262 x1 : array_like
1263 first of the two independent samples
1264 x2 : array_like
1265 second of the two independent samples
1266 low, upp : float
1267 equivalence interval low < mean of difference < upp
1268 weights : None or ndarray
1269 case weights for the two samples. For details on weights see
1270 ``DescrStatsW``
1271 transform : None or function
1272 If None (default), then the data is not transformed. Given a function
1273 sample data and thresholds are transformed. If transform is log the
1274 the equivalence interval is in ratio: low < x1 / x2 < upp
1276 Returns
1277 -------
1278 pvalue : float
1279 pvalue of the non-equivalence test
1280 t1, pv1, df1 : tuple
1281 test statistic, pvalue and degrees of freedom for lower threshold test
1282 t2, pv2, df2 : tuple
1283 test statistic, pvalue and degrees of freedom for upper threshold test
1285 '''
1287 if transform:
1288 if transform is np.log:
1289 #avoid hstack in special case
1290 x1 = transform(x1)
1291 x2 = transform(x2)
1292 else:
1293 #for transforms like rankdata that will need both datasets
1294 #concatenate works for stacking 1d and 2d arrays
1295 xx = transform(np.concatenate((x1, x2), 0))
1296 x1 = xx[:len(x1)]
1297 x2 = xx[len(x1):]
1298 low = transform(low)
1299 upp = transform(upp)
1300 dd = DescrStatsW(x1 - x2, weights=weights, ddof=0)
1301 t1, pv1, df1 = dd.ttest_mean(low, alternative='larger')
1302 t2, pv2, df2 = dd.ttest_mean(upp, alternative='smaller')
1303 return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2)
1305def ztest(x1, x2=None, value=0, alternative='two-sided', usevar='pooled',
1306 ddof=1.):
1307 '''test for mean based on normal distribution, one or two samples
1309 In the case of two samples, the samples are assumed to be independent.
1311 Parameters
1312 ----------
1313 x1 : array_like, 1-D or 2-D
1314 first of the two independent samples
1315 x2 : array_like, 1-D or 2-D
1316 second of the two independent samples
1317 value : float
1318 In the one sample case, value is the mean of x1 under the Null
1319 hypothesis.
1320 In the two sample case, value is the difference between mean of x1 and
1321 mean of x2 under the Null hypothesis. The test statistic is
1322 `x1_mean - x2_mean - value`.
1323 alternative : str
1324 The alternative hypothesis, H1, has to be one of the following
1326 'two-sided': H1: difference in means not equal to value (default)
1327 'larger' : H1: difference in means larger than value
1328 'smaller' : H1: difference in means smaller than value
1330 usevar : str, 'pooled'
1331 Currently, only 'pooled' is implemented.
1332 If ``pooled``, then the standard deviation of the samples is assumed to be
1333 the same. see CompareMeans.ztest_ind for different options.
1334 ddof : int
1335 Degrees of freedom use in the calculation of the variance of the mean
1336 estimate. In the case of comparing means this is one, however it can
1337 be adjusted for testing other statistics (proportion, correlation)
1339 Returns
1340 -------
1341 tstat : float
1342 test statistic
1343 pvalue : float
1344 pvalue of the t-test
1346 Notes
1347 -----
1348 usevar not implemented, is always pooled in two sample case
1349 use CompareMeans instead.
1351 '''
1352 # TODO: this should delegate to CompareMeans like ttest_ind
1353 # However that does not implement ddof
1355 #usevar is not used, always pooled
1357 if usevar != 'pooled':
1358 raise NotImplementedError('only usevar="pooled" is implemented')
1360 x1 = np.asarray(x1)
1361 nobs1 = x1.shape[0]
1362 x1_mean = x1.mean(0)
1363 x1_var = x1.var(0)
1364 if x2 is not None:
1365 x2 = np.asarray(x2)
1366 nobs2 = x2.shape[0]
1367 x2_mean = x2.mean(0)
1368 x2_var = x2.var(0)
1369 var_pooled = (nobs1 * x1_var + nobs2 * x2_var)
1370 var_pooled /= (nobs1 + nobs2 - 2 * ddof)
1371 var_pooled *= (1. / nobs1 + 1. / nobs2)
1372 else:
1373 var_pooled = x1_var / (nobs1 - ddof)
1374 x2_mean = 0
1376 std_diff = np.sqrt(var_pooled)
1377 #stat = x1_mean - x2_mean - value
1378 return _zstat_generic(x1_mean, x2_mean, std_diff, alternative, diff=value)
1380def zconfint(x1, x2=None, value=0, alpha=0.05, alternative='two-sided',
1381 usevar='pooled', ddof=1.):
1382 '''confidence interval based on normal distribution z-test
1384 Parameters
1385 ----------
1386 x1 : array_like, 1-D or 2-D
1387 first of the two independent samples, see notes for 2-D case
1388 x2 : array_like, 1-D or 2-D
1389 second of the two independent samples, see notes for 2-D case
1390 value : float
1391 In the one sample case, value is the mean of x1 under the Null
1392 hypothesis.
1393 In the two sample case, value is the difference between mean of x1 and
1394 mean of x2 under the Null hypothesis. The test statistic is
1395 `x1_mean - x2_mean - value`.
1396 usevar : str, 'pooled'
1397 Currently, only 'pooled' is implemented.
1398 If ``pooled``, then the standard deviation of the samples is assumed to be
1399 the same. see CompareMeans.ztest_ind for different options.
1400 ddof : int
1401 Degrees of freedom use in the calculation of the variance of the mean
1402 estimate. In the case of comparing means this is one, however it can
1403 be adjusted for testing other statistics (proportion, correlation)
1405 Notes
1406 -----
1407 checked only for 1 sample case
1409 usevar not implemented, is always pooled in two sample case
1411 ``value`` shifts the confidence interval so it is centered at
1412 `x1_mean - x2_mean - value`
1414 See Also
1415 --------
1416 ztest
1417 CompareMeans
1419 '''
1420 #usevar is not used, always pooled
1421 # mostly duplicate code from ztest
1423 if usevar != 'pooled':
1424 raise NotImplementedError('only usevar="pooled" is implemented')
1425 x1 = np.asarray(x1)
1426 nobs1 = x1.shape[0]
1427 x1_mean = x1.mean(0)
1428 x1_var = x1.var(0)
1429 if x2 is not None:
1430 x2 = np.asarray(x2)
1431 nobs2 = x2.shape[0]
1432 x2_mean = x2.mean(0)
1433 x2_var = x2.var(0)
1434 var_pooled = (nobs1 * x1_var + nobs2 * x2_var)
1435 var_pooled /= (nobs1 + nobs2 - 2 * ddof)
1436 var_pooled *= (1. / nobs1 + 1. / nobs2)
1437 else:
1438 var_pooled = x1_var / (nobs1 - ddof)
1439 x2_mean = 0
1441 std_diff = np.sqrt(var_pooled)
1442 ci = _zconfint_generic(x1_mean - x2_mean - value, std_diff, alpha, alternative)
1443 return ci
1445def ztost(x1, low, upp, x2=None, usevar='pooled', ddof=1.):
1446 '''Equivalence test based on normal distribution
1448 Parameters
1449 ----------
1450 x1 : array_like
1451 one sample or first sample for 2 independent samples
1452 low, upp : float
1453 equivalence interval low < m1 - m2 < upp
1454 x1 : array_like or None
1455 second sample for 2 independent samples test. If None, then a
1456 one-sample test is performed.
1457 usevar : str, 'pooled'
1458 If `pooled`, then the standard deviation of the samples is assumed to be
1459 the same. Only `pooled` is currently implemented.
1461 Returns
1462 -------
1463 pvalue : float
1464 pvalue of the non-equivalence test
1465 t1, pv1 : tuple of floats
1466 test statistic and pvalue for lower threshold test
1467 t2, pv2 : tuple of floats
1468 test statistic and pvalue for upper threshold test
1470 Notes
1471 -----
1472 checked only for 1 sample case
1474 '''
1475 tt1 = ztest(x1, x2, alternative='larger', usevar=usevar, value=low,
1476 ddof=ddof)
1477 tt2 = ztest(x1, x2, alternative='smaller', usevar=usevar, value=upp,
1478 ddof=ddof)
1479 return np.maximum(tt1[1], tt2[1]), tt1, tt2,