Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/vector_ar/util.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Miscellaneous utility code for VAR estimation
4"""
5from statsmodels.compat.python import asbytes
6from statsmodels.compat.pandas import frequencies
7import numpy as np
8import scipy.stats as stats
9import scipy.linalg.decomp as decomp
10import pandas as pd
12import statsmodels.tsa.tsatools as tsa
15#-------------------------------------------------------------------------------
16# Auxiliary functions for estimation
18def get_var_endog(y, lags, trend='c', has_constant='skip'):
19 """
20 Make predictor matrix for VAR(p) process
22 Z := (Z_0, ..., Z_T).T (T x Kp)
23 Z_t = [1 y_t y_{t-1} ... y_{t - p + 1}] (Kp x 1)
25 Ref: Lütkepohl p.70 (transposed)
27 has_constant can be 'raise', 'add', or 'skip'. See add_constant.
28 """
29 nobs = len(y)
30 # Ravel C order, need to put in descending order
31 Z = np.array([y[t-lags : t][::-1].ravel() for t in range(lags, nobs)])
33 # Add constant, trend, etc.
34 if trend != 'nc':
35 Z = tsa.add_trend(Z, prepend=True, trend=trend,
36 has_constant=has_constant)
38 return Z
41def get_trendorder(trend='c'):
42 # Handle constant, etc.
43 if trend == 'c':
44 trendorder = 1
45 elif trend in ('n', 'nc'):
46 trendorder = 0
47 elif trend == 'ct':
48 trendorder = 2
49 elif trend == 'ctt':
50 trendorder = 3
51 return trendorder
54def make_lag_names(names, lag_order, trendorder=1, exog=None):
55 """
56 Produce list of lag-variable names. Constant / trends go at the beginning
58 Examples
59 --------
60 >>> make_lag_names(['foo', 'bar'], 2, 1)
61 ['const', 'L1.foo', 'L1.bar', 'L2.foo', 'L2.bar']
62 """
63 lag_names = []
64 if isinstance(names, str):
65 names = [names]
67 # take care of lagged endogenous names
68 for i in range(1, lag_order + 1):
69 for name in names:
70 if not isinstance(name, str):
71 name = str(name) # will need consistent unicode handling
72 lag_names.append('L'+str(i)+'.'+name)
74 # handle the constant name
75 if trendorder != 0:
76 lag_names.insert(0, 'const')
77 if trendorder > 1:
78 lag_names.insert(1, 'trend')
79 if trendorder > 2:
80 lag_names.insert(2, 'trend**2')
81 if exog is not None:
82 if isinstance(exog, pd.Series):
83 exog = pd.DataFrame(exog)
84 elif not hasattr(exog, 'ndim'):
85 exog = np.asarray(exog)
86 if exog.ndim == 1:
87 exog = exog[:, None]
88 for i in range(exog.shape[1]):
89 if isinstance(exog, pd.DataFrame):
90 exog_name = str(exog.columns[i])
91 else:
92 exog_name = "exog" + str(i)
93 lag_names.insert(trendorder + i, exog_name)
94 return lag_names
97def comp_matrix(coefs):
98 """
99 Return compansion matrix for the VAR(1) representation for a VAR(p) process
100 (companion form)
102 A = [A_1 A_2 ... A_p-1 A_p
103 I_K 0 0 0
104 0 I_K ... 0 0
105 0 ... I_K 0]
106 """
107 p, k1, k2 = coefs.shape
108 if k1 != k2:
109 raise ValueError('coefs must be 3-d with shape (p, k, k).')
111 kp = k1 * p
113 result = np.zeros((kp, kp))
114 result[:k1] = np.concatenate(coefs, axis=1)
116 # Set I_K matrices
117 if p > 1:
118 result[np.arange(k1, kp), np.arange(kp-k1)] = 1
120 return result
122#-------------------------------------------------------------------------------
123# Miscellaneous stuff
126def parse_lutkepohl_data(path): # pragma: no cover
127 """
128 Parse data files from Lütkepohl (2005) book
130 Source for data files: www.jmulti.de
131 """
133 from collections import deque
134 from datetime import datetime
135 import re
137 regex = re.compile(asbytes(r'<(.*) (\w)([\d]+)>.*'))
138 with open(path, 'rb') as f:
139 lines = deque(f)
141 to_skip = 0
142 while asbytes('*/') not in lines.popleft():
143 #while '*/' not in lines.popleft():
144 to_skip += 1
146 while True:
147 to_skip += 1
148 line = lines.popleft()
149 m = regex.match(line)
150 if m:
151 year, freq, start_point = m.groups()
152 break
154 data = (pd.read_csv(path, delimiter=r"\s+", header=to_skip+1)
155 .to_records(index=False))
157 n = len(data)
159 # generate the corresponding date range (using pandas for now)
160 start_point = int(start_point)
161 year = int(year)
163 offsets = {
164 asbytes('Q'): frequencies.BQuarterEnd(),
165 asbytes('M'): frequencies.BMonthEnd(),
166 asbytes('A'): frequencies.BYearEnd()
167 }
169 # create an instance
170 offset = offsets[freq]
172 inc = offset * (start_point - 1)
173 start_date = offset.rollforward(datetime(year, 1, 1)) + inc
175 offset = offsets[freq]
176 date_range = pd.date_range(start=start_date, freq=offset, periods=n)
178 return data, date_range
181def norm_signif_level(alpha=0.05):
182 return stats.norm.ppf(1 - alpha / 2)
185def acf_to_acorr(acf):
186 diag = np.diag(acf[0])
187 # numpy broadcasting sufficient
188 return acf / np.sqrt(np.outer(diag, diag))
191def varsim(coefs, intercept, sig_u, steps=100, initvalues=None, seed=None):
192 """
193 Simulate VAR(p) process, given coefficients and assuming Gaussian noise
195 Parameters
196 ----------
197 coefs : ndarray
198 Coefficients for the VAR lags of endog.
199 intercept : None or ndarray 1-D (neqs,) or (steps, neqs)
200 This can be either the intercept for each equation or an offset.
201 If None, then the VAR process has a zero intercept.
202 If intercept is 1-D, then the same (endog specific) intercept is added
203 to all observations.
204 If intercept is 2-D, then it is treated as an offset and is added as
205 an observation specific intercept to the autoregression. In this case,
206 the intercept/offset should have same number of rows as steps, and the
207 same number of columns as endogenous variables (neqs).
208 sig_u : ndarray
209 Covariance matrix of the residuals or innovations.
210 If sig_u is None, then an identity matrix is used.
211 steps : {None, int}
212 number of observations to simulate, this includes the initial
213 observations to start the autoregressive process.
214 If offset is not None, then exog of the model are used if they were
215 provided in the model
216 seed : {None, int}
217 If seed is not None, then it will be used with for the random
218 variables generated by numpy.random.
220 Returns
221 -------
222 endog_simulated : nd_array
223 Endog of the simulated VAR process
224 """
225 rs = np.random.RandomState(seed=seed)
226 rmvnorm = rs.multivariate_normal
227 p, k, k = coefs.shape
228 if sig_u is None:
229 sig_u = np.eye(k)
230 ugen = rmvnorm(np.zeros(len(sig_u)), sig_u, steps)
231 result = np.zeros((steps, k))
232 if intercept is not None:
233 # intercept can be 2-D like an offset variable
234 if np.ndim(intercept) > 1:
235 if not len(intercept) == len(ugen):
236 raise ValueError('2-D intercept needs to have length `steps`')
237 # add intercept/offset also to intial values
238 result += intercept
239 result[p:] += ugen[p:]
240 else:
241 result[p:] = ugen[p:]
243 # add in AR terms
244 for t in range(p, steps):
245 ygen = result[t]
246 for j in range(p):
247 ygen += np.dot(coefs[j], result[t-j-1])
249 return result
252def get_index(lst, name):
253 try:
254 result = lst.index(name)
255 except Exception:
256 if not isinstance(name, int):
257 raise
258 result = name
259 return result
262#method used repeatedly in Sims-Zha error bands
263def eigval_decomp(sym_array):
264 """
265 Returns
266 -------
267 W: array of eigenvectors
268 eigva: list of eigenvalues
269 k: largest eigenvector
270 """
271 #check if symmetric, do not include shock period
272 eigva, W = decomp.eig(sym_array, left=True, right=False)
273 k = np.argmax(eigva)
274 return W, eigva, k
277def vech(A):
278 """
279 Simple vech operator
280 Returns
281 -------
282 vechvec: vector of all elements on and below diagonal
283 """
285 length=A.shape[1]
286 vechvec=[]
287 for i in range(length):
288 b=i
289 while b < length:
290 vechvec.append(A[b,i])
291 b=b+1
292 vechvec=np.asarray(vechvec)
293 return vechvec
296def seasonal_dummies(n_seasons, len_endog, first_period=0, centered=False):
297 """
299 Parameters
300 ----------
301 n_seasons : int >= 0
302 Number of seasons (e.g. 12 for monthly data and 4 for quarterly data).
303 len_endog : int >= 0
304 Total number of observations.
305 first_period : int, default: 0
306 Season of the first observation. As an example, suppose we have monthly
307 data and the first observation is in March (third month of the year).
308 In this case we pass 2 as first_period. (0 for the first season,
309 1 for the second, ..., n_seasons-1 for the last season).
310 An integer greater than n_seasons-1 are treated in the same way as the
311 integer modulo n_seasons.
312 centered : bool, default: False
313 If True, center (demean) the dummy variables. That is useful in order
314 to get seasonal dummies that are orthogonal to the vector of constant
315 dummy variables (a vector of ones).
317 Returns
318 -------
319 seasonal_dummies : ndarray (len_endog x n_seasons-1)
320 """
321 if n_seasons == 0:
322 return np.empty((len_endog, 0))
323 if n_seasons > 0:
324 season_exog = np.zeros((len_endog, n_seasons - 1))
325 for i in range(n_seasons - 1):
326 season_exog[(i-first_period) % n_seasons::n_seasons, i] = 1
328 if centered:
329 season_exog -= 1 / n_seasons
330 return season_exog