Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/_bds.py : 13%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2BDS test for IID time series
4References
5----------
7Broock, W. A., J. A. Scheinkman, W. D. Dechert, and B. LeBaron. 1996.
8"A Test for Independence Based on the Correlation Dimension."
9Econometric Reviews 15 (3): 197-235.
11Kanzler, Ludwig. 1999.
12"Very Fast and Correctly Sized Estimation of the BDS Statistic".
13SSRN Scholarly Paper ID 151669. Rochester, NY: Social Science Research Network.
15LeBaron, Blake. 1997.
16"A Fast Algorithm for the BDS Statistic."
17Studies in Nonlinear Dynamics & Econometrics 2 (2) (January 1).
18"""
20import numpy as np
21from scipy import stats
23from statsmodels.tools.validation import array_like
26def distance_indicators(x, epsilon=None, distance=1.5):
27 """
28 Calculate all pairwise threshold distance indicators for a time series
30 Parameters
31 ----------
32 x : 1d array
33 observations of time series for which heaviside distance indicators
34 are calculated
35 epsilon : scalar, optional
36 the threshold distance to use in calculating the heaviside indicators
37 distance : scalar, optional
38 if epsilon is omitted, specifies the distance multiplier to use when
39 computing it
41 Returns
42 -------
43 indicators : 2d array
44 matrix of distance threshold indicators
46 Notes
47 -----
48 Since this can be a very large matrix, use np.int8 to save some space.
49 """
50 x = array_like(x, 'x')
52 if epsilon is not None and epsilon <= 0:
53 raise ValueError("Threshold distance must be positive if specified."
54 " Got epsilon of %f" % epsilon)
55 if distance <= 0:
56 raise ValueError("Threshold distance must be positive."
57 " Got distance multiplier %f" % distance)
59 # TODO: add functionality to select epsilon optimally
60 # TODO: and/or compute for a range of epsilons in [0.5*s, 2.0*s]?
61 # or [1.5*s, 2.0*s]?
62 if epsilon is None:
63 epsilon = distance * x.std(ddof=1)
65 return np.abs(x[:, None] - x) < epsilon
68def correlation_sum(indicators, embedding_dim):
69 """
70 Calculate a correlation sum
72 Useful as an estimator of a correlation integral
74 Parameters
75 ----------
76 indicators : 2d array
77 matrix of distance threshold indicators
78 embedding_dim : int
79 embedding dimension
81 Returns
82 -------
83 corrsum : float
84 Correlation sum
85 indicators_joint
86 matrix of joint-distance-threshold indicators
87 """
88 if not indicators.ndim == 2:
89 raise ValueError('Indicators must be a matrix')
90 if not indicators.shape[0] == indicators.shape[1]:
91 raise ValueError('Indicator matrix must be symmetric (square)')
93 if embedding_dim == 1:
94 indicators_joint = indicators
95 else:
96 corrsum, indicators = correlation_sum(indicators, embedding_dim - 1)
97 indicators_joint = indicators[1:, 1:]*indicators[:-1, :-1]
99 nobs = len(indicators_joint)
100 corrsum = np.mean(indicators_joint[np.triu_indices(nobs, 1)])
101 return corrsum, indicators_joint
104def correlation_sums(indicators, max_dim):
105 """
106 Calculate all correlation sums for embedding dimensions 1:max_dim
108 Parameters
109 ----------
110 indicators : 2d array
111 matrix of distance threshold indicators
112 max_dim : int
113 maximum embedding dimension
115 Returns
116 -------
117 corrsums : 1d array
118 Correlation sums
119 """
121 corrsums = np.zeros((1, max_dim))
123 corrsums[0, 0], indicators = correlation_sum(indicators, 1)
124 for i in range(1, max_dim):
125 corrsums[0, i], indicators = correlation_sum(indicators, 2)
127 return corrsums
130def _var(indicators, max_dim):
131 """
132 Calculate the variance of a BDS effect
134 Parameters
135 ----------
136 indicators : 2d array
137 matrix of distance threshold indicators
138 max_dim : int
139 maximum embedding dimension
141 Returns
142 -------
143 variances : float
144 Variance of BDS effect
145 """
146 nobs = len(indicators)
147 corrsum_1dim, _ = correlation_sum(indicators, 1)
148 k = ((indicators.sum(1)**2).sum() - 3*indicators.sum() +
149 2*nobs) / (nobs * (nobs - 1) * (nobs - 2))
151 variances = np.zeros((1, max_dim - 1))
153 for embedding_dim in range(2, max_dim + 1):
154 tmp = 0
155 for j in range(1, embedding_dim):
156 tmp += (k**(embedding_dim - j))*(corrsum_1dim**(2 * j))
157 variances[0, embedding_dim-2] = 4 * (
158 k**embedding_dim +
159 2 * tmp +
160 ((embedding_dim - 1)**2) * (corrsum_1dim**(2 * embedding_dim)) -
161 (embedding_dim**2) * k * (corrsum_1dim**(2 * embedding_dim - 2)))
163 return variances, k
166def bds(x, max_dim=2, epsilon=None, distance=1.5):
167 """
168 BDS Test Statistic for Independence of a Time Series
170 Parameters
171 ----------
172 x : ndarray
173 Observations of time series for which bds statistics is calculated.
174 max_dim : int
175 The maximum embedding dimension.
176 epsilon : {float, None}, optional
177 The threshold distance to use in calculating the correlation sum.
178 distance : float, optional
179 Specifies the distance multiplier to use when computing the test
180 statistic if epsilon is omitted.
182 Returns
183 -------
184 bds_stat : float
185 The BDS statistic.
186 pvalue : float
187 The p-values associated with the BDS statistic.
189 Notes
190 -----
191 The null hypothesis of the test statistic is for an independent and
192 identically distributed (i.i.d.) time series, and an unspecified
193 alternative hypothesis.
195 This test is often used as a residual diagnostic.
197 The calculation involves matrices of size (nobs, nobs), so this test
198 will not work with very long datasets.
200 Implementation conditions on the first m-1 initial values, which are
201 required to calculate the m-histories:
202 x_t^m = (x_t, x_{t-1}, ... x_{t-(m-1)})
203 """
204 x = array_like(x, 'x', ndim=1)
205 nobs_full = len(x)
207 if max_dim < 2 or max_dim >= nobs_full:
208 raise ValueError("Maximum embedding dimension must be in the range"
209 " [2,len(x)-1]. Got %d." % max_dim)
211 # Cache the indicators
212 indicators = distance_indicators(x, epsilon, distance)
214 # Get estimates of m-dimensional correlation integrals
215 corrsum_mdims = correlation_sums(indicators, max_dim)
217 # Get variance of effect
218 variances, k = _var(indicators, max_dim)
219 stddevs = np.sqrt(variances)
221 bds_stats = np.zeros((1, max_dim - 1))
222 pvalues = np.zeros((1, max_dim - 1))
223 for embedding_dim in range(2, max_dim+1):
224 ninitial = (embedding_dim - 1)
225 nobs = nobs_full - ninitial
227 # Get estimates of 1-dimensional correlation integrals
228 # (see Kanzler footnote 10 for why indicators are truncated)
229 corrsum_1dim, _ = correlation_sum(indicators[ninitial:, ninitial:], 1)
230 corrsum_mdim = corrsum_mdims[0, embedding_dim - 1]
232 # Get the intermediate values for the statistic
233 effect = corrsum_mdim - (corrsum_1dim**embedding_dim)
234 sd = stddevs[0, embedding_dim - 2]
236 # Calculate the statistic: bds_stat ~ N(0,1)
237 bds_stats[0, embedding_dim - 2] = np.sqrt(nobs) * effect / sd
239 # Calculate the p-value (two-tailed test)
240 pvalue = 2*stats.norm.sf(np.abs(bds_stats[0, embedding_dim - 2]))
241 pvalues[0, embedding_dim - 2] = pvalue
243 return np.squeeze(bds_stats), np.squeeze(pvalues)