Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/distributions/edgeworth.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
2import warnings
4import numpy as np
5from numpy.polynomial.hermite_e import HermiteE
6from scipy.special import factorial
7from scipy.stats import rv_continuous
8import scipy.special as special
10# TODO:
11# * actually solve (31) of Blinnikov & Moessner
12# * numerical stability: multiply factorials in logspace?
13# * ppf & friends: Cornish & Fisher series, or tabulate/solve
16_faa_di_bruno_cache = {
17 1: [[(1, 1)]],
18 2: [[(1, 2)], [(2, 1)]],
19 3: [[(1, 3)], [(2, 1), (1, 1)], [(3, 1)]],
20 4: [[(1, 4)], [(1, 2), (2, 1)], [(2, 2)], [(3, 1), (1, 1)], [(4, 1)]]}
23def _faa_di_bruno_partitions(n):
24 """ Return all non-negative integer solutions of the diophantine equation
26 n*k_n + ... + 2*k_2 + 1*k_1 = n (1)
28 Parameters
29 ----------
30 n: int
31 the r.h.s. of Eq. (1)
33 Returns
34 -------
35 partitions: a list of solutions of (1). Each solution is itself
36 a list of the form `[(m, k_m), ...]` for non-zero `k_m`.
37 Notice that the index `m` is 1-based.
39 Examples:
40 ---------
41 >>> _faa_di_bruno_partitions(2)
42 [[(1, 2)], [(2, 1)]]
43 >>> for p in _faa_di_bruno_partitions(4):
44 ... assert 4 == sum(m * k for (m, k) in p)
45 """
46 if n < 1:
47 raise ValueError("Expected a positive integer; got %s instead" % n)
48 try:
49 return _faa_di_bruno_cache[n]
50 except KeyError:
51 # TODO: higher order terms
52 # solve Eq. (31) from Blinninkov & Moessner here
53 raise NotImplementedError('Higher order terms not yet implemented.')
56def cumulant_from_moments(momt, n):
57 """Compute n-th cumulant given moments.
59 Parameters
60 ----------
61 momt: array_like
62 `momt[j]` contains `(j+1)`-th moment.
63 These can be raw moments around zero, or central moments
64 (in which case, `momt[0]` == 0).
65 n: int
66 which cumulant to calculate (must be >1)
68 Returns
69 -------
70 kappa: float
71 n-th cumulant.
72 """
73 if n < 1:
74 raise ValueError("Expected a positive integer. Got %s instead." % n)
75 if len(momt) < n:
76 raise ValueError("%s-th cumulant requires %s moments, "
77 "only got %s." % (n, n, len(momt)))
78 kappa = 0.
79 for p in _faa_di_bruno_partitions(n):
80 r = sum(k for (m, k) in p)
81 term = (-1)**(r - 1) * factorial(r - 1)
82 for (m, k) in p:
83 term *= np.power(momt[m - 1] / factorial(m), k) / factorial(k)
84 kappa += term
85 kappa *= factorial(n)
86 return kappa
88## copied from scipy.stats.distributions to avoid the overhead of
89## the public methods
90_norm_pdf_C = np.sqrt(2*np.pi)
91def _norm_pdf(x):
92 return np.exp(-x**2/2.0) / _norm_pdf_C
94def _norm_cdf(x):
95 return special.ndtr(x)
97def _norm_sf(x):
98 return special.ndtr(-x)
101class ExpandedNormal(rv_continuous):
102 """Construct the Edgeworth expansion pdf given cumulants.
104 Parameters
105 ----------
106 cum: array_like
107 `cum[j]` contains `(j+1)`-th cumulant: cum[0] is the mean,
108 cum[1] is the variance and so on.
110 Notes
111 -----
112 This is actually an asymptotic rather than convergent series, hence
113 higher orders of the expansion may or may not improve the result.
114 In a strongly non-Gaussian case, it is possible that the density
115 becomes negative, especially far out in the tails.
117 Examples
118 --------
119 Construct the 4th order expansion for the chi-square distribution using
120 the known values of the cumulants:
122 >>> import matplotlib.pyplot as plt
123 >>> from scipy import stats
124 >>> from scipy.special import factorial
125 >>> df = 12
126 >>> chi2_c = [2**(j-1) * factorial(j-1) * df for j in range(1, 5)]
127 >>> edgw_chi2 = ExpandedNormal(chi2_c, name='edgw_chi2', momtype=0)
129 Calculate several moments:
130 >>> m, v = edgw_chi2.stats(moments='mv')
131 >>> np.allclose([m, v], [df, 2 * df])
132 True
134 Plot the density function:
135 >>> mu, sigma = df, np.sqrt(2*df)
136 >>> x = np.linspace(mu - 3*sigma, mu + 3*sigma)
137 >>> fig1 = plt.plot(x, stats.chi2.pdf(x, df=df), 'g-', lw=4, alpha=0.5)
138 >>> fig2 = plt.plot(x, stats.norm.pdf(x, mu, sigma), 'b--', lw=4, alpha=0.5)
139 >>> fig3 = plt.plot(x, edgw_chi2.pdf(x), 'r-', lw=2)
140 >>> plt.show()
142 References
143 ----------
144 .. [*] E.A. Cornish and R.A. Fisher, Moments and cumulants in the
145 specification of distributions, Revue de l'Institut Internat.
146 de Statistique. 5: 307 (1938), reprinted in
147 R.A. Fisher, Contributions to Mathematical Statistics. Wiley, 1950.
148 .. [*] https://en.wikipedia.org/wiki/Edgeworth_series
149 .. [*] S. Blinnikov and R. Moessner, Expansions for nearly Gaussian
150 distributions, Astron. Astrophys. Suppl. Ser. 130, 193 (1998)
151 """
152 def __init__(self, cum, name='Edgeworth expanded normal', **kwds):
153 if len(cum) < 2:
154 raise ValueError("At least two cumulants are needed.")
155 self._coef, self._mu, self._sigma = self._compute_coefs_pdf(cum)
156 self._herm_pdf = HermiteE(self._coef)
157 if self._coef.size > 2:
158 self._herm_cdf = HermiteE(-self._coef[1:])
159 else:
160 self._herm_cdf = lambda x: 0.
162 # warn if pdf(x) < 0 for some values of x within 4 sigma
163 r = np.real_if_close(self._herm_pdf.roots())
164 r = (r - self._mu) / self._sigma
165 if r[(np.imag(r) == 0) & (np.abs(r) < 4)].any():
166 mesg = 'PDF has zeros at %s ' % r
167 warnings.warn(mesg, RuntimeWarning)
169 kwds.update({'name': name,
170 'momtype': 0}) # use pdf, not ppf in self.moment()
171 super(ExpandedNormal, self).__init__(**kwds)
173 def _pdf(self, x):
174 y = (x - self._mu) / self._sigma
175 return self._herm_pdf(y) * _norm_pdf(y) / self._sigma
177 def _cdf(self, x):
178 y = (x - self._mu) / self._sigma
179 return (_norm_cdf(y) +
180 self._herm_cdf(y) * _norm_pdf(y))
182 def _sf(self, x):
183 y = (x - self._mu) / self._sigma
184 return (_norm_sf(y) -
185 self._herm_cdf(y) * _norm_pdf(y))
187 def _compute_coefs_pdf(self, cum):
188 # scale cumulants by \sigma
189 mu, sigma = cum[0], np.sqrt(cum[1])
190 lam = np.asarray(cum)
191 for j, l in enumerate(lam):
192 lam[j] /= cum[1]**j
194 coef = np.zeros(lam.size * 3 - 5)
195 coef[0] = 1.
196 for s in range(lam.size - 2):
197 for p in _faa_di_bruno_partitions(s+1):
198 term = sigma**(s+1)
199 for (m, k) in p:
200 term *= np.power(lam[m+1] / factorial(m+2), k) / factorial(k)
201 r = sum(k for (m, k) in p)
202 coef[s + 1 + 2*r] += term
203 return coef, mu, sigma