Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/stats/_adnorm.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Created on Sun Sep 25 21:23:38 2011
5Author: Josef Perktold and Scipy developers
6License : BSD-3
7"""
8import numpy as np
9from scipy import stats
11from statsmodels.tools.validation import array_like, bool_like, int_like
14def anderson_statistic(x, dist='norm', fit=True, params=(), axis=0):
15 """
16 Calculate the Anderson-Darling a2 statistic.
18 Parameters
19 ----------
20 x : array_like
21 The data to test.
22 dist : {'norm', callable}
23 The assumed distribution under the null of test statistic.
24 fit : bool
25 If True, then the distribution parameters are estimated.
26 Currently only for 1d data x, except in case dist='norm'.
27 params : tuple
28 The optional distribution parameters if fit is False.
29 axis : int
30 If dist is 'norm' or fit is False, then data can be an n-dimensional
31 and axis specifies the axis of a variable.
33 Returns
34 -------
35 {float, ndarray}
36 The Anderson-Darling statistic.
37 """
38 x = array_like(x, 'x', ndim=None)
39 fit = bool_like(fit, 'fit')
40 axis = int_like(axis, 'axis')
41 y = np.sort(x, axis=axis)
42 nobs = y.shape[axis]
43 if fit:
44 if dist == 'norm':
45 xbar = np.expand_dims(np.mean(x, axis=axis), axis)
46 s = np.expand_dims(np.std(x, ddof=1, axis=axis), axis)
47 w = (y - xbar) / s
48 z = stats.norm.cdf(w)
49 # print z
50 elif callable(dist):
51 params = dist.fit(x)
52 # print params
53 z = dist.cdf(y, *params)
54 print(z)
55 else:
56 raise ValueError("dist must be 'norm' or a Callable")
57 else:
58 if callable(dist):
59 z = dist.cdf(y, *params)
60 else:
61 raise ValueError('if fit is false, then dist must be callable')
63 i = np.arange(1, nobs + 1)
64 sl1 = [None] * x.ndim
65 sl1[axis] = slice(None)
66 sl1 = tuple(sl1)
67 sl2 = [slice(None)] * x.ndim
68 sl2[axis] = slice(None, None, -1)
69 sl2 = tuple(sl2)
70 s = np.sum((2 * i[sl1] - 1.0) / nobs * (np.log(z) + np.log1p(-z[sl2])),
71 axis=axis)
72 a2 = -nobs - s
73 return a2
76def normal_ad(x, axis=0):
77 """
78 Anderson-Darling test for normal distribution unknown mean and variance.
80 Parameters
81 ----------
82 x : array_like
83 The data array.
84 axis : int
85 The axis to perform the test along.
87 Returns
88 -------
89 ad2 : float
90 Anderson Darling test statistic.
91 pval : float
92 The pvalue for hypothesis that the data comes from a normal
93 distribution with unknown mean and variance.
95 See Also
96 --------
97 statsmodels.stats.diagnostic.anderson_statistic
98 The Anderson-Darling a2 statistic.
99 statsmodels.stats.diagnostic.kstest_fit
100 Kolmogorov-Smirnov test with estimated parameters for Normal or
101 Exponential distributions.
102 """
103 ad2 = anderson_statistic(x, dist='norm', fit=True, axis=axis)
104 n = x.shape[axis]
106 ad2a = ad2 * (1 + 0.75 / n + 2.25 / n ** 2)
108 if np.size(ad2a) == 1:
109 if (ad2a >= 0.00 and ad2a < 0.200):
110 pval = 1 - np.exp(-13.436 + 101.14 * ad2a - 223.73 * ad2a ** 2)
111 elif ad2a < 0.340:
112 pval = 1 - np.exp(-8.318 + 42.796 * ad2a - 59.938 * ad2a ** 2)
113 elif ad2a < 0.600:
114 pval = np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a ** 2)
115 elif ad2a <= 13:
116 pval = np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a ** 2)
117 else:
118 pval = 0.0 # is < 4.9542108058458799e-31
120 else:
121 bounds = np.array([0.0, 0.200, 0.340, 0.600])
123 pval0 = lambda ad2a: np.nan * np.ones_like(ad2a)
124 pval1 = lambda ad2a: 1 - np.exp(
125 -13.436 + 101.14 * ad2a - 223.73 * ad2a ** 2)
126 pval2 = lambda ad2a: 1 - np.exp(
127 -8.318 + 42.796 * ad2a - 59.938 * ad2a ** 2)
128 pval3 = lambda ad2a: np.exp(0.9177 - 4.279 * ad2a - 1.38 * ad2a ** 2)
129 pval4 = lambda ad2a: np.exp(1.2937 - 5.709 * ad2a + 0.0186 * ad2a ** 2)
131 pvalli = [pval0, pval1, pval2, pval3, pval4]
133 idx = np.searchsorted(bounds, ad2a, side='right')
134 pval = np.nan * np.ones_like(ad2a)
135 for i in range(5):
136 mask = (idx == i)
137 pval[mask] = pvalli[i](ad2a[mask])
139 return ad2, pval
142if __name__ == '__main__':
143 x = np.array([-0.1184, -1.3403, 0.0063, -0.612, -0.3869, -0.2313,
144 -2.8485, -0.2167, 0.4153, 1.8492, -0.3706, 0.9726,
145 -0.1501, -0.0337, -1.4423, 1.2489, 0.9182, -0.2331,
146 -0.6182, 0.1830])
147 r_res = np.array([0.58672353588821502, 0.1115380760041617])
148 ad2, pval = normal_ad(x)
149 print(ad2, pval)
150 print(r_res - [ad2, pval])
152 print(anderson_statistic((x - x.mean()) / x.std(), dist=stats.norm,
153 fit=False))
154 print(anderson_statistic(x, dist=stats.norm, fit=True))