Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3Miscellaneous utility code for VAR estimation 

4""" 

5from statsmodels.compat.python import asbytes 

6from statsmodels.compat.pandas import frequencies 

7import numpy as np 

8import scipy.stats as stats 

9import scipy.linalg.decomp as decomp 

10import pandas as pd 

11 

12import statsmodels.tsa.tsatools as tsa 

13 

14 

15#------------------------------------------------------------------------------- 

16# Auxiliary functions for estimation 

17 

18def get_var_endog(y, lags, trend='c', has_constant='skip'): 

19 """ 

20 Make predictor matrix for VAR(p) process 

21 

22 Z := (Z_0, ..., Z_T).T (T x Kp) 

23 Z_t = [1 y_t y_{t-1} ... y_{t - p + 1}] (Kp x 1) 

24 

25 Ref: Lütkepohl p.70 (transposed) 

26 

27 has_constant can be 'raise', 'add', or 'skip'. See add_constant. 

28 """ 

29 nobs = len(y) 

30 # Ravel C order, need to put in descending order 

31 Z = np.array([y[t-lags : t][::-1].ravel() for t in range(lags, nobs)]) 

32 

33 # Add constant, trend, etc. 

34 if trend != 'nc': 

35 Z = tsa.add_trend(Z, prepend=True, trend=trend, 

36 has_constant=has_constant) 

37 

38 return Z 

39 

40 

41def get_trendorder(trend='c'): 

42 # Handle constant, etc. 

43 if trend == 'c': 

44 trendorder = 1 

45 elif trend in ('n', 'nc'): 

46 trendorder = 0 

47 elif trend == 'ct': 

48 trendorder = 2 

49 elif trend == 'ctt': 

50 trendorder = 3 

51 return trendorder 

52 

53 

54def make_lag_names(names, lag_order, trendorder=1, exog=None): 

55 """ 

56 Produce list of lag-variable names. Constant / trends go at the beginning 

57 

58 Examples 

59 -------- 

60 >>> make_lag_names(['foo', 'bar'], 2, 1) 

61 ['const', 'L1.foo', 'L1.bar', 'L2.foo', 'L2.bar'] 

62 """ 

63 lag_names = [] 

64 if isinstance(names, str): 

65 names = [names] 

66 

67 # take care of lagged endogenous names 

68 for i in range(1, lag_order + 1): 

69 for name in names: 

70 if not isinstance(name, str): 

71 name = str(name) # will need consistent unicode handling 

72 lag_names.append('L'+str(i)+'.'+name) 

73 

74 # handle the constant name 

75 if trendorder != 0: 

76 lag_names.insert(0, 'const') 

77 if trendorder > 1: 

78 lag_names.insert(1, 'trend') 

79 if trendorder > 2: 

80 lag_names.insert(2, 'trend**2') 

81 if exog is not None: 

82 if isinstance(exog, pd.Series): 

83 exog = pd.DataFrame(exog) 

84 elif not hasattr(exog, 'ndim'): 

85 exog = np.asarray(exog) 

86 if exog.ndim == 1: 

87 exog = exog[:, None] 

88 for i in range(exog.shape[1]): 

89 if isinstance(exog, pd.DataFrame): 

90 exog_name = str(exog.columns[i]) 

91 else: 

92 exog_name = "exog" + str(i) 

93 lag_names.insert(trendorder + i, exog_name) 

94 return lag_names 

95 

96 

97def comp_matrix(coefs): 

98 """ 

99 Return compansion matrix for the VAR(1) representation for a VAR(p) process 

100 (companion form) 

101 

102 A = [A_1 A_2 ... A_p-1 A_p 

103 I_K 0 0 0 

104 0 I_K ... 0 0 

105 0 ... I_K 0] 

106 """ 

107 p, k1, k2 = coefs.shape 

108 if k1 != k2: 

109 raise ValueError('coefs must be 3-d with shape (p, k, k).') 

110 

111 kp = k1 * p 

112 

113 result = np.zeros((kp, kp)) 

114 result[:k1] = np.concatenate(coefs, axis=1) 

115 

116 # Set I_K matrices 

117 if p > 1: 

118 result[np.arange(k1, kp), np.arange(kp-k1)] = 1 

119 

120 return result 

121 

122#------------------------------------------------------------------------------- 

123# Miscellaneous stuff 

124 

125 

126def parse_lutkepohl_data(path): # pragma: no cover 

127 """ 

128 Parse data files from Lütkepohl (2005) book 

129 

130 Source for data files: www.jmulti.de 

131 """ 

132 

133 from collections import deque 

134 from datetime import datetime 

135 import re 

136 

137 regex = re.compile(asbytes(r'<(.*) (\w)([\d]+)>.*')) 

138 with open(path, 'rb') as f: 

139 lines = deque(f) 

140 

141 to_skip = 0 

142 while asbytes('*/') not in lines.popleft(): 

143 #while '*/' not in lines.popleft(): 

144 to_skip += 1 

145 

146 while True: 

147 to_skip += 1 

148 line = lines.popleft() 

149 m = regex.match(line) 

150 if m: 

151 year, freq, start_point = m.groups() 

152 break 

153 

154 data = (pd.read_csv(path, delimiter=r"\s+", header=to_skip+1) 

155 .to_records(index=False)) 

156 

157 n = len(data) 

158 

159 # generate the corresponding date range (using pandas for now) 

160 start_point = int(start_point) 

161 year = int(year) 

162 

163 offsets = { 

164 asbytes('Q'): frequencies.BQuarterEnd(), 

165 asbytes('M'): frequencies.BMonthEnd(), 

166 asbytes('A'): frequencies.BYearEnd() 

167 } 

168 

169 # create an instance 

170 offset = offsets[freq] 

171 

172 inc = offset * (start_point - 1) 

173 start_date = offset.rollforward(datetime(year, 1, 1)) + inc 

174 

175 offset = offsets[freq] 

176 date_range = pd.date_range(start=start_date, freq=offset, periods=n) 

177 

178 return data, date_range 

179 

180 

181def norm_signif_level(alpha=0.05): 

182 return stats.norm.ppf(1 - alpha / 2) 

183 

184 

185def acf_to_acorr(acf): 

186 diag = np.diag(acf[0]) 

187 # numpy broadcasting sufficient 

188 return acf / np.sqrt(np.outer(diag, diag)) 

189 

190 

191def varsim(coefs, intercept, sig_u, steps=100, initvalues=None, seed=None): 

192 """ 

193 Simulate VAR(p) process, given coefficients and assuming Gaussian noise 

194 

195 Parameters 

196 ---------- 

197 coefs : ndarray 

198 Coefficients for the VAR lags of endog. 

199 intercept : None or ndarray 1-D (neqs,) or (steps, neqs) 

200 This can be either the intercept for each equation or an offset. 

201 If None, then the VAR process has a zero intercept. 

202 If intercept is 1-D, then the same (endog specific) intercept is added 

203 to all observations. 

204 If intercept is 2-D, then it is treated as an offset and is added as 

205 an observation specific intercept to the autoregression. In this case, 

206 the intercept/offset should have same number of rows as steps, and the 

207 same number of columns as endogenous variables (neqs). 

208 sig_u : ndarray 

209 Covariance matrix of the residuals or innovations. 

210 If sig_u is None, then an identity matrix is used. 

211 steps : {None, int} 

212 number of observations to simulate, this includes the initial 

213 observations to start the autoregressive process. 

214 If offset is not None, then exog of the model are used if they were 

215 provided in the model 

216 seed : {None, int} 

217 If seed is not None, then it will be used with for the random 

218 variables generated by numpy.random. 

219 

220 Returns 

221 ------- 

222 endog_simulated : nd_array 

223 Endog of the simulated VAR process 

224 """ 

225 rs = np.random.RandomState(seed=seed) 

226 rmvnorm = rs.multivariate_normal 

227 p, k, k = coefs.shape 

228 if sig_u is None: 

229 sig_u = np.eye(k) 

230 ugen = rmvnorm(np.zeros(len(sig_u)), sig_u, steps) 

231 result = np.zeros((steps, k)) 

232 if intercept is not None: 

233 # intercept can be 2-D like an offset variable 

234 if np.ndim(intercept) > 1: 

235 if not len(intercept) == len(ugen): 

236 raise ValueError('2-D intercept needs to have length `steps`') 

237 # add intercept/offset also to intial values 

238 result += intercept 

239 result[p:] += ugen[p:] 

240 else: 

241 result[p:] = ugen[p:] 

242 

243 # add in AR terms 

244 for t in range(p, steps): 

245 ygen = result[t] 

246 for j in range(p): 

247 ygen += np.dot(coefs[j], result[t-j-1]) 

248 

249 return result 

250 

251 

252def get_index(lst, name): 

253 try: 

254 result = lst.index(name) 

255 except Exception: 

256 if not isinstance(name, int): 

257 raise 

258 result = name 

259 return result 

260 

261 

262#method used repeatedly in Sims-Zha error bands 

263def eigval_decomp(sym_array): 

264 """ 

265 Returns 

266 ------- 

267 W: array of eigenvectors 

268 eigva: list of eigenvalues 

269 k: largest eigenvector 

270 """ 

271 #check if symmetric, do not include shock period 

272 eigva, W = decomp.eig(sym_array, left=True, right=False) 

273 k = np.argmax(eigva) 

274 return W, eigva, k 

275 

276 

277def vech(A): 

278 """ 

279 Simple vech operator 

280 Returns 

281 ------- 

282 vechvec: vector of all elements on and below diagonal 

283 """ 

284 

285 length=A.shape[1] 

286 vechvec=[] 

287 for i in range(length): 

288 b=i 

289 while b < length: 

290 vechvec.append(A[b,i]) 

291 b=b+1 

292 vechvec=np.asarray(vechvec) 

293 return vechvec 

294 

295 

296def seasonal_dummies(n_seasons, len_endog, first_period=0, centered=False): 

297 """ 

298 

299 Parameters 

300 ---------- 

301 n_seasons : int >= 0 

302 Number of seasons (e.g. 12 for monthly data and 4 for quarterly data). 

303 len_endog : int >= 0 

304 Total number of observations. 

305 first_period : int, default: 0 

306 Season of the first observation. As an example, suppose we have monthly 

307 data and the first observation is in March (third month of the year). 

308 In this case we pass 2 as first_period. (0 for the first season, 

309 1 for the second, ..., n_seasons-1 for the last season). 

310 An integer greater than n_seasons-1 are treated in the same way as the 

311 integer modulo n_seasons. 

312 centered : bool, default: False 

313 If True, center (demean) the dummy variables. That is useful in order 

314 to get seasonal dummies that are orthogonal to the vector of constant 

315 dummy variables (a vector of ones). 

316 

317 Returns 

318 ------- 

319 seasonal_dummies : ndarray (len_endog x n_seasons-1) 

320 """ 

321 if n_seasons == 0: 

322 return np.empty((len_endog, 0)) 

323 if n_seasons > 0: 

324 season_exog = np.zeros((len_endog, n_seasons - 1)) 

325 for i in range(n_seasons - 1): 

326 season_exog[(i-first_period) % n_seasons::n_seasons, i] = 1 

327 

328 if centered: 

329 season_exog -= 1 / n_seasons 

330 return season_exog