Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2BDS test for IID time series 

3 

4References 

5---------- 

6 

7Broock, W. A., J. A. Scheinkman, W. D. Dechert, and B. LeBaron. 1996. 

8"A Test for Independence Based on the Correlation Dimension." 

9Econometric Reviews 15 (3): 197-235. 

10 

11Kanzler, Ludwig. 1999. 

12"Very Fast and Correctly Sized Estimation of the BDS Statistic". 

13SSRN Scholarly Paper ID 151669. Rochester, NY: Social Science Research Network. 

14 

15LeBaron, Blake. 1997. 

16"A Fast Algorithm for the BDS Statistic." 

17Studies in Nonlinear Dynamics & Econometrics 2 (2) (January 1). 

18""" 

19 

20import numpy as np 

21from scipy import stats 

22 

23from statsmodels.tools.validation import array_like 

24 

25 

26def distance_indicators(x, epsilon=None, distance=1.5): 

27 """ 

28 Calculate all pairwise threshold distance indicators for a time series 

29 

30 Parameters 

31 ---------- 

32 x : 1d array 

33 observations of time series for which heaviside distance indicators 

34 are calculated 

35 epsilon : scalar, optional 

36 the threshold distance to use in calculating the heaviside indicators 

37 distance : scalar, optional 

38 if epsilon is omitted, specifies the distance multiplier to use when 

39 computing it 

40 

41 Returns 

42 ------- 

43 indicators : 2d array 

44 matrix of distance threshold indicators 

45 

46 Notes 

47 ----- 

48 Since this can be a very large matrix, use np.int8 to save some space. 

49 """ 

50 x = array_like(x, 'x') 

51 

52 if epsilon is not None and epsilon <= 0: 

53 raise ValueError("Threshold distance must be positive if specified." 

54 " Got epsilon of %f" % epsilon) 

55 if distance <= 0: 

56 raise ValueError("Threshold distance must be positive." 

57 " Got distance multiplier %f" % distance) 

58 

59 # TODO: add functionality to select epsilon optimally 

60 # TODO: and/or compute for a range of epsilons in [0.5*s, 2.0*s]? 

61 # or [1.5*s, 2.0*s]? 

62 if epsilon is None: 

63 epsilon = distance * x.std(ddof=1) 

64 

65 return np.abs(x[:, None] - x) < epsilon 

66 

67 

68def correlation_sum(indicators, embedding_dim): 

69 """ 

70 Calculate a correlation sum 

71 

72 Useful as an estimator of a correlation integral 

73 

74 Parameters 

75 ---------- 

76 indicators : 2d array 

77 matrix of distance threshold indicators 

78 embedding_dim : int 

79 embedding dimension 

80 

81 Returns 

82 ------- 

83 corrsum : float 

84 Correlation sum 

85 indicators_joint 

86 matrix of joint-distance-threshold indicators 

87 """ 

88 if not indicators.ndim == 2: 

89 raise ValueError('Indicators must be a matrix') 

90 if not indicators.shape[0] == indicators.shape[1]: 

91 raise ValueError('Indicator matrix must be symmetric (square)') 

92 

93 if embedding_dim == 1: 

94 indicators_joint = indicators 

95 else: 

96 corrsum, indicators = correlation_sum(indicators, embedding_dim - 1) 

97 indicators_joint = indicators[1:, 1:]*indicators[:-1, :-1] 

98 

99 nobs = len(indicators_joint) 

100 corrsum = np.mean(indicators_joint[np.triu_indices(nobs, 1)]) 

101 return corrsum, indicators_joint 

102 

103 

104def correlation_sums(indicators, max_dim): 

105 """ 

106 Calculate all correlation sums for embedding dimensions 1:max_dim 

107 

108 Parameters 

109 ---------- 

110 indicators : 2d array 

111 matrix of distance threshold indicators 

112 max_dim : int 

113 maximum embedding dimension 

114 

115 Returns 

116 ------- 

117 corrsums : 1d array 

118 Correlation sums 

119 """ 

120 

121 corrsums = np.zeros((1, max_dim)) 

122 

123 corrsums[0, 0], indicators = correlation_sum(indicators, 1) 

124 for i in range(1, max_dim): 

125 corrsums[0, i], indicators = correlation_sum(indicators, 2) 

126 

127 return corrsums 

128 

129 

130def _var(indicators, max_dim): 

131 """ 

132 Calculate the variance of a BDS effect 

133 

134 Parameters 

135 ---------- 

136 indicators : 2d array 

137 matrix of distance threshold indicators 

138 max_dim : int 

139 maximum embedding dimension 

140 

141 Returns 

142 ------- 

143 variances : float 

144 Variance of BDS effect 

145 """ 

146 nobs = len(indicators) 

147 corrsum_1dim, _ = correlation_sum(indicators, 1) 

148 k = ((indicators.sum(1)**2).sum() - 3*indicators.sum() + 

149 2*nobs) / (nobs * (nobs - 1) * (nobs - 2)) 

150 

151 variances = np.zeros((1, max_dim - 1)) 

152 

153 for embedding_dim in range(2, max_dim + 1): 

154 tmp = 0 

155 for j in range(1, embedding_dim): 

156 tmp += (k**(embedding_dim - j))*(corrsum_1dim**(2 * j)) 

157 variances[0, embedding_dim-2] = 4 * ( 

158 k**embedding_dim + 

159 2 * tmp + 

160 ((embedding_dim - 1)**2) * (corrsum_1dim**(2 * embedding_dim)) - 

161 (embedding_dim**2) * k * (corrsum_1dim**(2 * embedding_dim - 2))) 

162 

163 return variances, k 

164 

165 

166def bds(x, max_dim=2, epsilon=None, distance=1.5): 

167 """ 

168 BDS Test Statistic for Independence of a Time Series 

169 

170 Parameters 

171 ---------- 

172 x : ndarray 

173 Observations of time series for which bds statistics is calculated. 

174 max_dim : int 

175 The maximum embedding dimension. 

176 epsilon : {float, None}, optional 

177 The threshold distance to use in calculating the correlation sum. 

178 distance : float, optional 

179 Specifies the distance multiplier to use when computing the test 

180 statistic if epsilon is omitted. 

181 

182 Returns 

183 ------- 

184 bds_stat : float 

185 The BDS statistic. 

186 pvalue : float 

187 The p-values associated with the BDS statistic. 

188 

189 Notes 

190 ----- 

191 The null hypothesis of the test statistic is for an independent and 

192 identically distributed (i.i.d.) time series, and an unspecified 

193 alternative hypothesis. 

194 

195 This test is often used as a residual diagnostic. 

196 

197 The calculation involves matrices of size (nobs, nobs), so this test 

198 will not work with very long datasets. 

199 

200 Implementation conditions on the first m-1 initial values, which are 

201 required to calculate the m-histories: 

202 x_t^m = (x_t, x_{t-1}, ... x_{t-(m-1)}) 

203 """ 

204 x = array_like(x, 'x', ndim=1) 

205 nobs_full = len(x) 

206 

207 if max_dim < 2 or max_dim >= nobs_full: 

208 raise ValueError("Maximum embedding dimension must be in the range" 

209 " [2,len(x)-1]. Got %d." % max_dim) 

210 

211 # Cache the indicators 

212 indicators = distance_indicators(x, epsilon, distance) 

213 

214 # Get estimates of m-dimensional correlation integrals 

215 corrsum_mdims = correlation_sums(indicators, max_dim) 

216 

217 # Get variance of effect 

218 variances, k = _var(indicators, max_dim) 

219 stddevs = np.sqrt(variances) 

220 

221 bds_stats = np.zeros((1, max_dim - 1)) 

222 pvalues = np.zeros((1, max_dim - 1)) 

223 for embedding_dim in range(2, max_dim+1): 

224 ninitial = (embedding_dim - 1) 

225 nobs = nobs_full - ninitial 

226 

227 # Get estimates of 1-dimensional correlation integrals 

228 # (see Kanzler footnote 10 for why indicators are truncated) 

229 corrsum_1dim, _ = correlation_sum(indicators[ninitial:, ninitial:], 1) 

230 corrsum_mdim = corrsum_mdims[0, embedding_dim - 1] 

231 

232 # Get the intermediate values for the statistic 

233 effect = corrsum_mdim - (corrsum_1dim**embedding_dim) 

234 sd = stddevs[0, embedding_dim - 2] 

235 

236 # Calculate the statistic: bds_stat ~ N(0,1) 

237 bds_stats[0, embedding_dim - 2] = np.sqrt(nobs) * effect / sd 

238 

239 # Calculate the p-value (two-tailed test) 

240 pvalue = 2*stats.norm.sf(np.abs(bds_stats[0, embedding_dim - 2])) 

241 pvalues[0, embedding_dim - 2] = pvalue 

242 

243 return np.squeeze(bds_stats), np.squeeze(pvalues)