Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/vector_ar/vecm.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
3from collections import defaultdict
4import numpy as np
5from numpy import hstack, vstack
6from numpy.linalg import inv, svd
7import scipy
8import scipy.stats
10from statsmodels.compat.python import iteritems
11from statsmodels.iolib.summary import Summary
12from statsmodels.iolib.table import SimpleTable
13from statsmodels.tools.decorators import cache_readonly
14from statsmodels.tools.sm_exceptions import HypothesisTestWarning
15from statsmodels.tsa.tsatools import duplication_matrix, vec, lagmat
17import statsmodels.tsa.base.tsa_model as tsbase
18import statsmodels.tsa.vector_ar.irf as irf
19import statsmodels.tsa.vector_ar.plotting as plot
20from statsmodels.tsa.vector_ar.hypothesis_test_results import \
21 CausalityTestResults, WhitenessTestResults
22from statsmodels.tsa.vector_ar.util import get_index, seasonal_dummies
23from statsmodels.tsa.vector_ar.var_model import forecast, forecast_interval, \
24 VAR, ma_rep, orth_ma_rep, test_normality, LagOrderResults, _compute_acov
25from statsmodels.tsa.coint_tables import c_sja, c_sjt
28def select_order(data, maxlags, deterministic="nc", seasons=0, exog=None,
29 exog_coint=None):
30 """
31 Compute lag order selections based on each of the available information
32 criteria.
34 Parameters
35 ----------
36 data : array_like (nobs_tot x neqs)
37 The observed data.
38 maxlags : int
39 All orders until maxlag will be compared according to the information
40 criteria listed in the Results-section of this docstring.
41 deterministic : str {``"nc"``, ``"co"``, ``"ci"``, ``"lo"``, ``"li"``}
42 * ``"nc"`` - no deterministic terms
43 * ``"co"`` - constant outside the cointegration relation
44 * ``"ci"`` - constant within the cointegration relation
45 * ``"lo"`` - linear trend outside the cointegration relation
46 * ``"li"`` - linear trend within the cointegration relation
48 Combinations of these are possible (e.g. ``"cili"`` or ``"colo"`` for
49 linear trend with intercept). See the docstring of the
50 :class:`VECM`-class for more information.
51 seasons : int, default: 0
52 Number of periods in a seasonal cycle.
53 exog : ndarray (nobs_tot x neqs) or `None`, default: `None`
54 Deterministic terms outside the cointegration relation.
55 exog_coint : ndarray (nobs_tot x neqs) or `None`, default: `None`
56 Deterministic terms inside the cointegration relation.
58 Returns
59 -------
60 selected_orders : :class:`statsmodels.tsa.vector_ar.var_model.LagOrderResults`
61 """
62 ic = defaultdict(list)
63 for p in range(1, maxlags + 2): # +2 because k_ar_VECM == k_ar_VAR - 1
64 exogs = []
65 if "co" in deterministic or "ci" in deterministic:
66 exogs.append(np.ones(len(data)).reshape(-1, 1))
67 if "lo" in deterministic or "li" in deterministic:
68 exogs.append(1 + np.arange(len(data)).reshape(-1, 1))
69 if exog_coint is not None:
70 exogs.append(exog_coint)
71 if seasons > 0:
72 exogs.append(seasonal_dummies(seasons, len(data)
73 ).reshape(-1, seasons-1))
74 if exog is not None:
75 exogs.append(exog)
76 exogs = hstack(exogs) if exogs else None
77 var_model = VAR(data, exogs)
78 # exclude some periods ==> same amount of data used for each lag order
79 var_result = var_model._estimate_var(lags=p, offset=maxlags+1-p)
81 for k, v in iteritems(var_result.info_criteria):
82 ic[k].append(v)
83 # -1+1 in the following line is only here for clarification.
84 # -1 because k_ar_VECM == k_ar_VAR - 1
85 # +1 because p == index +1 (we start with p=1, not p=0)
86 selected_orders = dict((ic_name, np.array(ic_value).argmin() - 1 + 1)
87 for ic_name, ic_value in iteritems(ic))
89 return LagOrderResults(ic, selected_orders, True)
92def _linear_trend(nobs, k_ar, coint=False):
93 """
94 Construct an ndarray representing a linear trend in a VECM.
96 Parameters
97 ----------
98 nobs : int
99 Number of observations excluding the presample.
100 k_ar : int
101 Number of lags in levels.
102 coint : bool, default: False
103 If True (False), the returned array represents a linear trend inside
104 (outside) the cointegration relation.
106 Returns
107 -------
108 ret : ndarray (nobs)
109 An ndarray representing a linear trend in a VECM
111 Notes
112 -----
113 The returned array's size is nobs and not nobs_tot so it cannot be used to
114 construct the exog-argument of VECM's __init__ method.
115 """
116 ret = np.arange(nobs) + k_ar
117 if not coint:
118 ret += 1
119 return ret
122def _num_det_vars(det_string, seasons=0):
123 """Gives the number of deterministic variables specified by det_string and
124 seasons.
126 Parameters
127 ----------
128 det_string : str {"nc", "co", "ci", "lo", "li"}
129 * "nc" - no deterministic terms
130 * "co" - constant outside the cointegration relation
131 * "ci" - constant within the cointegration relation
132 * "lo" - linear trend outside the cointegration relation
133 * "li" - linear trend within the cointegration relation
135 Combinations of these are possible (e.g. "cili" or "colo" for linear
136 trend with intercept). See the docstring of the :class:`VECM`-class for
137 more information.
138 seasons : int
139 Number of periods in a seasonal cycle.
141 Returns
142 -------
143 num : int
144 Number of deterministic terms and number dummy variables for seasonal
145 terms.
146 """
147 num = 0
148 if "ci" in det_string or "co" in det_string:
149 num += 1
150 if "li" in det_string or "lo" in det_string:
151 num += 1
152 if seasons > 0:
153 num += seasons - 1
154 return num
157def _deterministic_to_exog(deterministic, seasons, nobs_tot, first_season=0,
158 seasons_centered=False, exog=None, exog_coint=None):
159 """
160 Translate all information about deterministic terms into a single array.
162 These information is taken from `deterministic` and `seasons` as well as
163 from the `exog` and `exog_coint` arrays. The resulting array form can then
164 be used e.g. in VAR's __init__ method.
166 Parameters
167 ----------
168 deterministic : str
169 A string specifying the deterministic terms in the model. See VECM's
170 docstring for more information.
171 seasons : int
172 Number of periods in a seasonal cycle.
173 nobs_tot : int
174 Number of observations including the presample.
175 first_season : int, default: 0
176 Season of the first observation.
177 seasons_centered : bool, default: False
178 If True, the seasonal dummy variables are demeaned such that they are
179 orthogonal to an intercept term.
180 exog : ndarray (nobs_tot x #det_terms) or None, default: None
181 An ndarray representing deterministic terms outside the cointegration
182 relation.
183 exog_coint : ndarray (nobs_tot x #det_terms_coint) or None, default: None
184 An ndarray representing deterministic terms inside the cointegration
185 relation.
187 Returns
188 -------
189 exog : ndarray or None
190 None, if the function's arguments do not contain deterministic terms.
191 Otherwise, an ndarray representing these deterministic terms.
192 """
193 exogs = []
194 if "co" in deterministic or "ci" in deterministic:
195 exogs.append(np.ones(nobs_tot))
196 if exog_coint is not None:
197 exogs.append(exog_coint)
198 if "lo" in deterministic or "li" in deterministic:
199 exogs.append(np.arange(nobs_tot))
200 if seasons > 0:
201 exogs.append(seasonal_dummies(seasons, nobs_tot,
202 first_period=first_season,
203 centered=seasons_centered))
204 if exog is not None:
205 exogs.append(exog)
206 return np.column_stack(exogs) if exogs else None
209def _mat_sqrt(_2darray):
210 """Calculates the square root of a matrix.
212 Parameters
213 ----------
214 _2darray : ndarray
215 A 2-dimensional ndarray representing a square matrix.
217 Returns
218 -------
219 result : ndarray
220 Square root of the matrix given as function argument.
221 """
222 u_, s_, v_ = svd(_2darray, full_matrices=False)
223 s_ = np.sqrt(s_)
224 return u_.dot(s_[:, None] * v_)
227def _endog_matrices(endog, exog, exog_coint, diff_lags, deterministic,
228 seasons=0, first_season=0):
229 """
230 Returns different matrices needed for parameter estimation.
232 Compare p. 186 in [1]_. The returned matrices consist of elements of the
233 data as well as elements representing deterministic terms. A tuple of
234 consisting of these matrices is returned.
236 Parameters
237 ----------
238 endog : ndarray (neqs x nobs_tot)
239 The whole sample including the presample.
240 exog : ndarray (nobs_tot x neqs) or None
241 Deterministic terms outside the cointegration relation.
242 exog_coint : ndarray (nobs_tot x neqs) or None
243 Deterministic terms inside the cointegration relation.
244 diff_lags : int
245 Number of lags in the VEC representation.
246 deterministic : str {``"nc"``, ``"co"``, ``"ci"``, ``"lo"``, ``"li"``}
247 * ``"nc"`` - no deterministic terms
248 * ``"co"`` - constant outside the cointegration relation
249 * ``"ci"`` - constant within the cointegration relation
250 * ``"lo"`` - linear trend outside the cointegration relation
251 * ``"li"`` - linear trend within the cointegration relation
253 Combinations of these are possible (e.g. ``"cili"`` or ``"colo"`` for
254 linear trend with intercept). See the docstring of the
255 :class:`VECM`-class for more information.
256 seasons : int, default: 0
257 Number of periods in a seasonal cycle. 0 (default) means no seasons.
258 first_season : int, default: 0
259 The season of the first observation. `0` means first season, `1` means
260 second season, ..., `seasons-1` means the last season.
262 Returns
263 -------
264 y_1_T : ndarray (neqs x nobs)
265 The (transposed) data without the presample.
266 `.. math:: (y_1, \\ldots, y_T)
267 delta_y_1_T : ndarray (neqs x nobs)
268 The first differences of endog.
269 `.. math:: (y_1, \\ldots, y_T) - (y_0, \\ldots, y_{T-1})
270 y_lag1 : ndarray (neqs x nobs)
271 (dimensions assuming no deterministic terms are given)
272 Endog of the previous period (lag 1).
273 `.. math:: (y_0, \\ldots, y_{T-1})
274 delta_x : ndarray (k_ar_diff*neqs x nobs)
275 (dimensions assuming no deterministic terms are given)
276 Lagged differenced endog, used as regressor for the short term
277 equation.
279 References
280 ----------
281 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
282 """
283 # p. 286:
284 p = diff_lags+1
285 y = endog
286 K = y.shape[0]
287 y_1_T = y[:, p:]
288 T = y_1_T.shape[1]
289 delta_y = np.diff(y)
290 delta_y_1_T = delta_y[:, p-1:]
292 y_lag1 = y[:, p-1:-1]
293 if "co" in deterministic and "ci" in deterministic:
294 raise ValueError("Both 'co' and 'ci' as deterministic terms given. " +
295 "Please choose one of the two.")
296 y_lag1_stack = [y_lag1]
297 if "ci" in deterministic: # pp. 257, 299, 306, 307
298 y_lag1_stack.append(np.ones(T))
299 if "li" in deterministic: # p. 299
300 y_lag1_stack.append(_linear_trend(T, p, coint=True))
301 if exog_coint is not None:
302 y_lag1_stack.append(exog_coint[-T-1:-1].T)
303 y_lag1 = np.row_stack(y_lag1_stack)
305 # p. 286:
306 delta_x = np.zeros((diff_lags*K, T))
307 if diff_lags > 0:
308 for j in range(delta_x.shape[1]):
309 delta_x[:, j] = (delta_y[:, j+p-2:None if j-1 < 0 else j-1:-1]
310 .T.reshape(K*(p-1)))
311 delta_x_stack = [delta_x]
312 # p. 299, p. 303:
313 if "co" in deterministic:
314 delta_x_stack.append(np.ones(T))
315 if seasons > 0:
316 delta_x_stack.append(seasonal_dummies(seasons, delta_x.shape[1],
317 first_period=first_season + diff_lags + 1,
318 centered=True).T)
319 if "lo" in deterministic:
320 delta_x_stack.append(_linear_trend(T, p))
321 if exog is not None:
322 delta_x_stack.append(exog[-T:].T)
323 delta_x = np.row_stack(delta_x_stack)
325 return y_1_T, delta_y_1_T, y_lag1, delta_x
328def _r_matrices(delta_y_1_T, y_lag1, delta_x):
329 """Returns two ndarrays needed for parameter estimation as well as the
330 calculation of standard errors.
332 Parameters
333 ----------
334 delta_y_1_T : ndarray (neqs x nobs)
335 The first differences of endog.
336 `.. math:: (y_1, \\ldots, y_T) - (y_0, \\ldots, y_{T-1})
337 y_lag1 : ndarray (neqs x nobs)
338 (dimensions assuming no deterministic terms are given)
339 Endog of the previous period (lag 1).
340 `.. math:: (y_0, \\ldots, y_{T-1})
341 delta_x : ndarray (k_ar_diff*neqs x nobs)
342 (dimensions assuming no deterministic terms are given)
343 Lagged differenced endog, used as regressor for the short term
344 equation.
346 Returns
347 -------
348 result : tuple
349 A tuple of two ndarrays. (See p. 292 in [1]_ for the definition of
350 R_0 and R_1.)
352 References
353 ----------
354 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
355 """
357 # todo: rewrite m such that a big (TxT) matrix is avoided
358 nobs = y_lag1.shape[1]
359 m = np.identity(nobs) - (
360 delta_x.T.dot(inv(delta_x.dot(delta_x.T))).dot(delta_x)) # p. 291
361 r0 = delta_y_1_T.dot(m) # p. 292
362 r1 = y_lag1.dot(m)
363 return r0, r1
366def _sij(delta_x, delta_y_1_T, y_lag1):
367 """Returns matrices and eigenvalues and -vectors used for parameter
368 estimation and the calculation of a models loglikelihood.
370 Parameters
371 ----------
372 delta_x : ndarray (k_ar_diff*neqs x nobs)
373 (dimensions assuming no deterministic terms are given)
374 delta_y_1_T : ndarray (neqs x nobs)
375 :math:`(y_1, \\ldots, y_T) - (y_0, \\ldots, y_{T-1})`
376 y_lag1 : ndarray (neqs x nobs)
377 (dimensions assuming no deterministic terms are given)
378 :math:`(y_0, \\ldots, y_{T-1})`
380 Returns
381 -------
382 result : tuple
383 A tuple of five ndarrays as well as eigenvalues and -vectors of a
384 certain (matrix) product of some of the returned ndarrays.
385 (See pp. 294-295 in [1]_ for more information on
386 :math:`S_0, S_1, \\lambda_i, \\v_i` for
387 :math:`i \\in \\{1, \\dots, K\\}`.)
389 References
390 ----------
391 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
392 """
393 nobs = y_lag1.shape[1]
394 r0, r1 = _r_matrices(delta_y_1_T, y_lag1, delta_x)
395 s00 = np.dot(r0, r0.T) / nobs
396 s01 = np.dot(r0, r1.T) / nobs
397 s10 = s01.T
398 s11 = np.dot(r1, r1.T) / nobs
399 s11_ = inv(_mat_sqrt(s11))
400 # p. 295:
401 s01_s11_ = np.dot(s01, s11_)
402 eig = np.linalg.eig(s01_s11_.T @ inv(s00) @ s01_s11_)
403 lambd = eig[0]
404 v = eig[1]
405 # reorder eig_vals to make them decreasing (and order eig_vecs accordingly)
406 lambd_order = np.argsort(lambd)[::-1]
407 lambd = lambd[lambd_order]
408 v = v[:, lambd_order]
409 return s00, s01, s10, s11, s11_, lambd, v
412class CointRankResults:
413 """A class for holding the results from testing the cointegration rank.
415 Parameters
416 ----------
417 rank : int (0 <= `rank` <= `neqs`)
418 The rank to choose according to the Johansen cointegration rank
419 test.
420 neqs : int
421 Number of variables in the time series.
422 test_stats : array_like (`rank` + 1 if `rank` < `neqs` else `rank`)
423 A one-dimensional array-like object containing the test statistics of
424 the conducted tests.
425 crit_vals : array_like (`rank` +1 if `rank` < `neqs` else `rank`)
426 A one-dimensional array-like object containing the critical values
427 corresponding to the entries in the `test_stats` argument.
428 method : str, {``"trace"``, ``"maxeig"``}, default: ``"trace"``
429 If ``"trace"``, the trace test statistic is used. If ``"maxeig"``, the
430 maximum eigenvalue test statistic is used.
431 signif : float, {0.1, 0.05, 0.01}, default: 0.05
432 The test's significance level.
433 """
434 def __init__(self, rank, neqs, test_stats, crit_vals, method="trace",
435 signif=0.05):
436 self.rank = rank
437 self.neqs = neqs
438 self.r_1 = [neqs if method == "trace" else i+1
439 for i in range(min(rank+1, neqs))]
440 self.test_stats = test_stats
441 self.crit_vals = crit_vals
442 self.method = method
443 self.signif = signif
445 def summary(self):
446 headers = ["r_0", "r_1", "test statistic", "critical value"]
447 title = "Johansen cointegration test using " + \
448 ("trace" if self.method == "trace" else "maximum eigenvalue") + \
449 " test statistic with {:.0%}".format(self.signif) + \
450 " significance level"
451 num_tests = min(self.rank, self.neqs-1)
452 data = [[i, self.r_1[i], self.test_stats[i], self.crit_vals[i]]
453 for i in range(num_tests + 1)]
454 data_fmt = {"data_fmts": ["%s", "%s", "%#0.4g", "%#0.4g"],
455 "data_aligns": "r"}
456 html_data_fmt = dict(data_fmt)
457 html_data_fmt["data_fmts"] = ["<td>" + i + "</td>"
458 for i in html_data_fmt["data_fmts"]]
459 return SimpleTable(data=data, headers=headers, title=title,
460 txt_fmt=data_fmt, html_fmt=html_data_fmt,
461 ltx_fmt=data_fmt)
463 def __str__(self):
464 return self.summary().as_text()
467def select_coint_rank(endog, det_order, k_ar_diff, method="trace",
468 signif=0.05):
469 """Calculate the cointegration rank of a VECM.
471 Parameters
472 ----------
473 endog : array_like (nobs_tot x neqs)
474 The data with presample.
475 det_order : int
476 * -1 - no deterministic terms
477 * 0 - constant term
478 * 1 - linear trend
479 k_ar_diff : int, nonnegative
480 Number of lagged differences in the model.
481 method : str, {``"trace"``, ``"maxeig"``}, default: ``"trace"``
482 If ``"trace"``, the trace test statistic is used. If ``"maxeig"``, the
483 maximum eigenvalue test statistic is used.
484 signif : float, {0.1, 0.05, 0.01}, default: 0.05
485 The test's significance level.
487 Returns
488 -------
489 rank : :class:`CointRankResults`
490 A :class:`CointRankResults` object containing the cointegration rank suggested
491 by the test and allowing a summary to be printed.
492 """
493 if method not in ["trace", "maxeig"]:
494 raise ValueError("The method argument has to be either 'trace' or"
495 "'maximum eigenvalue'.")
497 if det_order not in [-1, 0, 1]:
498 if type(det_order) == int and det_order > 1:
499 raise ValueError("A det_order greather than 1 is not supported."
500 "Use a value of -1, 0, or 1.")
501 else:
502 raise ValueError("det_order must be -1, 0, or 1.")
504 possible_signif_values = [0.1, 0.05, 0.01]
505 if signif not in possible_signif_values:
506 raise ValueError("Please choose a significance level from {0.1, 0.05,"
507 "0.01}")
509 coint_result = coint_johansen(endog, det_order, k_ar_diff)
510 test_stat = coint_result.lr1 if method == "trace" else coint_result.lr2
511 crit_vals = coint_result.cvt if method == "trace" else coint_result.cvm
512 signif_index = possible_signif_values.index(signif)
514 neqs = endog.shape[1]
515 r_0 = 0 # rank in null hypothesis
516 while r_0 < neqs:
517 if test_stat[r_0] < crit_vals[r_0, signif_index]:
518 break # we accept current rank
519 else:
520 r_0 += 1 # we reject current rank and test next possible rank
522 return CointRankResults(r_0, neqs, test_stat[:r_0 + 1],
523 crit_vals[:r_0 + 1, signif_index], method, signif)
526def coint_johansen(endog, det_order, k_ar_diff):
527 """
528 Johansen cointegration test of the cointegration rank of a VECM
530 Parameters
531 ----------
532 endog : array_like (nobs_tot x neqs)
533 Data to test
534 det_order : int
535 * -1 - no deterministic terms
536 * 0 - constant term
537 * 1 - linear trend
538 k_ar_diff : int, nonnegative
539 Number of lagged differences in the model.
541 Returns
542 -------
543 result : JohansenTestResult
544 An object containing the test's results. The most important attributes
545 of the result class are:
547 * trace_stat and trace_stat_crit_vals
548 * max_eig_stat and max_eig_stat_crit_vals
550 Notes
551 -----
552 The implementation might change to make more use of the existing VECM
553 framework.
555 See Also
556 --------
557 statsmodels.tsa.vector_ar.vecm.select_coint_rank
559 References
560 ----------
561 .. [1] Lütkepohl, H. 2005. New Introduction to Multiple Time Series
562 Analysis. Springer.
563 """
564 import warnings
565 if det_order not in [-1, 0, 1]:
566 warnings.warn("Critical values are only available for a det_order of "
567 "-1, 0, or 1.", category=HypothesisTestWarning)
568 if endog.shape[1] > 12: # todo: test with a time series of 13 variables
569 warnings.warn("Critical values are only available for time series "
570 "with 12 variables at most.",
571 category=HypothesisTestWarning)
573 from statsmodels.regression.linear_model import OLS
575 def detrend(y, order):
576 if order == -1:
577 return y
578 return OLS(y, np.vander(np.linspace(-1, 1, len(y)),
579 order+1)).fit().resid
581 def resid(y, x):
582 if x.size == 0:
583 return y
584 r = y - np.dot(x, np.dot(np.linalg.pinv(x), y))
585 return r
587 endog = np.asarray(endog)
588 nobs, neqs = endog.shape
590 # why this? f is detrend transformed series, det_order is detrend data
591 if det_order > -1:
592 f = 0
593 else:
594 f = det_order
596 endog = detrend(endog, det_order)
597 dx = np.diff(endog, 1, axis=0)
598 z = lagmat(dx, k_ar_diff)
599 z = z[k_ar_diff:]
600 z = detrend(z, f)
602 dx = dx[k_ar_diff:]
604 dx = detrend(dx, f)
605 r0t = resid(dx, z)
606 # GH 5731, [:-0] does not work, need [:t-0]
607 lx = endog[:(endog.shape[0]-k_ar_diff)]
608 lx = lx[1:]
609 dx = detrend(lx, f)
610 rkt = resid(dx, z) # level on lagged diffs
611 # Level covariance after filtering k_ar_diff
612 skk = np.dot(rkt.T, rkt) / rkt.shape[0]
613 # Covariacne between filtered and unfiltered
614 sk0 = np.dot(rkt.T, r0t) / rkt.shape[0]
615 s00 = np.dot(r0t.T, r0t) / r0t.shape[0]
616 sig = np.dot(sk0, np.dot(inv(s00), sk0.T))
617 tmp = inv(skk)
618 au, du = np.linalg.eig(np.dot(tmp, sig)) # au is eval, du is evec
620 temp = inv(np.linalg.cholesky(np.dot(du.T, np.dot(skk, du))))
621 dt = np.dot(du, temp)
623 # JP: the next part can be done much easier
624 auind = np.argsort(au)
625 aind = np.flipud(auind)
626 a = au[aind]
627 d = dt[:, aind]
628 # Normalize by first non-zero element of d, usually [0, 0]
629 # GH 5517
630 non_zero_d = d.flat != 0
631 if np.any(non_zero_d):
632 d *= np.sign(d.flat[non_zero_d][0])
634 # Compute the trace and max eigenvalue statistics
635 lr1 = np.zeros(neqs)
636 lr2 = np.zeros(neqs)
637 cvm = np.zeros((neqs, 3))
638 cvt = np.zeros((neqs, 3))
639 iota = np.ones(neqs)
640 t, junk = rkt.shape
641 for i in range(0, neqs):
642 tmp = np.log(iota - a)[i:]
643 lr1[i] = -t * np.sum(tmp, 0)
644 lr2[i] = -t * np.log(1-a[i])
645 cvm[i, :] = c_sja(neqs - i, det_order)
646 cvt[i, :] = c_sjt(neqs - i, det_order)
647 aind[i] = i
649 return JohansenTestResult(rkt, r0t, a, d, lr1, lr2, cvt, cvm, aind)
652class JohansenTestResult(object):
653 """
654 Results class for Johansen's cointegration test
656 Notes
657 -----
658 See p. 292 in [1]_ for r0t and rkt
660 References
661 ----------
662 .. [1] Lütkepohl, H. 2005. New Introduction to Multiple Time Series
663 Analysis. Springer.
664 """
665 def __init__(self, rkt, r0t, eig, evec, lr1, lr2, cvt, cvm, ind):
666 self._meth = 'johansen'
667 self._rkt = rkt
668 self._r0t = r0t
669 self._eig = eig
670 self._evec = evec
671 self._lr1 = lr1
672 self._lr2 = lr2
673 self._cvt = cvt
674 self._cvm = cvm
675 self._ind = ind
677 @property
678 def rkt(self):
679 """Residuals for :math:`Y_{-1}`"""
680 return self._rkt
682 @property
683 def r0t(self):
684 """Residuals for :math:`\\Delta Y`."""
685 return self._r0t
687 @property
688 def eig(self):
689 """Eigenvalues of VECM coefficient matrix"""
690 return self._eig
692 @property
693 def evec(self):
694 """Eigenvectors of VECM coefficient matrix"""
695 return self._evec
697 @property
698 def trace_stat(self):
699 """Trace statistic"""
700 return self._lr1
702 @property
703 def lr1(self):
704 """Trace statistic"""
705 return self._lr1
707 @property
708 def max_eig_stat(self):
709 """Maximum eigenvalue statistic"""
710 return self._lr2
712 @property
713 def lr2(self):
714 """Maximum eigenvalue statistic"""
715 return self._lr2
717 @property
718 def trace_stat_crit_vals(self):
719 """Critical values (90%, 95%, 99%) of trace statistic"""
720 return self._cvt
722 @property
723 def cvt(self):
724 """Critical values (90%, 95%, 99%) of trace statistic"""
725 return self._cvt
727 @property
728 def cvm(self):
729 """Critical values (90%, 95%, 99%) of maximum eigenvalue statistic."""
730 return self._cvm
732 @property
733 def max_eig_stat_crit_vals(self):
734 """Critical values (90%, 95%, 99%) of maximum eigenvalue statistic."""
735 return self._cvm
737 @property
738 def ind(self):
739 """Order of eigenvalues"""
740 return self._ind
742 @property
743 def meth(self):
744 """Test method"""
745 return self._meth
748class VECM(tsbase.TimeSeriesModel):
749 """
750 Class representing a Vector Error Correction Model (VECM).
752 A VECM(:math:`k_{ar}-1`) has the following form
754 .. math:: \\Delta y_t = \\Pi y_{t-1} + \\Gamma_1 \\Delta y_{t-1} + \\ldots + \\Gamma_{k_{ar}-1} \\Delta y_{t-k_{ar}+1} + u_t
756 where
758 .. math:: \\Pi = \\alpha \\beta'
760 as described in chapter 7 of [1]_.
762 Parameters
763 ----------
764 endog : array_like (nobs_tot x neqs)
765 2-d endogenous response variable.
766 exog : ndarray (nobs_tot x neqs) or None
767 Deterministic terms outside the cointegration relation.
768 exog_coint : ndarray (nobs_tot x neqs) or None
769 Deterministic terms inside the cointegration relation.
770 dates : array_like of datetime, optional
771 See :class:`statsmodels.tsa.base.tsa_model.TimeSeriesModel` for more
772 information.
773 freq : str, optional
774 See :class:`statsmodels.tsa.base.tsa_model.TimeSeriesModel` for more
775 information.
776 missing : str, optional
777 See :class:`statsmodels.base.model.Model` for more information.
778 k_ar_diff : int
779 Number of lagged differences in the model. Equals :math:`k_{ar} - 1` in
780 the formula above.
781 coint_rank : int
782 Cointegration rank, equals the rank of the matrix :math:`\\Pi` and the
783 number of columns of :math:`\\alpha` and :math:`\\beta`.
784 deterministic : str {``"nc"``, ``"co"``, ``"ci"``, ``"lo"``, ``"li"``}
785 * ``"nc"`` - no deterministic terms
786 * ``"co"`` - constant outside the cointegration relation
787 * ``"ci"`` - constant within the cointegration relation
788 * ``"lo"`` - linear trend outside the cointegration relation
789 * ``"li"`` - linear trend within the cointegration relation
791 Combinations of these are possible (e.g. ``"cili"`` or ``"colo"`` for
792 linear trend with intercept). When using a constant term you have to
793 choose whether you want to restrict it to the cointegration relation
794 (i.e. ``"ci"``) or leave it unrestricted (i.e. ``"co"``). Do not use
795 both ``"ci"`` and ``"co"``. The same applies for ``"li"`` and ``"lo"``
796 when using a linear term. See the Notes-section for more information.
797 seasons : int, default: 0
798 Number of periods in a seasonal cycle. 0 means no seasons.
799 first_season : int, default: 0
800 Season of the first observation.
802 Notes
803 -----
804 A VECM(:math:`k_{ar} - 1`) with deterministic terms has the form
806 .. math::
808 \\Delta y_t = \\alpha \\begin{pmatrix}\\beta' & \\eta'\\end{pmatrix} \\begin{pmatrix}y_{t-1}\\\\D^{co}_{t-1}\\end{pmatrix} + \\Gamma_1 \\Delta y_{t-1} + \\dots + \\Gamma_{k_{ar}-1} \\Delta y_{t-k_{ar}+1} + C D_t + u_t.
810 In :math:`D^{co}_{t-1}` we have the deterministic terms which are inside
811 the cointegration relation (or restricted to the cointegration relation).
812 :math:`\\eta` is the corresponding estimator. To pass a deterministic term
813 inside the cointegration relation, we can use the `exog_coint` argument.
814 For the two special cases of an intercept and a linear trend there exists
815 a simpler way to declare these terms: we can pass ``"ci"`` and ``"li"``
816 respectively to the `deterministic` argument. So for an intercept inside
817 the cointegration relation we can either pass ``"ci"`` as `deterministic`
818 or `np.ones(len(data))` as `exog_coint` if `data` is passed as the
819 `endog` argument. This ensures that :math:`D_{t-1}^{co} = 1` for all
820 :math:`t`.
822 We can also use deterministic terms outside the cointegration relation.
823 These are defined in :math:`D_t` in the formula above with the
824 corresponding estimators in the matrix :math:`C`. We specify such terms by
825 passing them to the `exog` argument. For an intercept and/or linear trend
826 we again have the possibility to use `deterministic` alternatively. For
827 an intercept we pass ``"co"`` and for a linear trend we pass ``"lo"`` where
828 the `o` stands for `outside`.
830 The following table shows the five cases considered in [2]_. The last
831 column indicates which string to pass to the `deterministic` argument for
832 each of these cases.
834 ==== =============================== =================================== =============
835 Case Intercept Slope of the linear trend `deterministic`
836 ==== =============================== =================================== =============
837 I 0 0 ``"nc"``
838 II :math:`- \\alpha \\beta^T \\mu` 0 ``"ci"``
839 III :math:`\\neq 0` 0 ``"co"``
840 IV :math:`\\neq 0` :math:`- \\alpha \\beta^T \\gamma` ``"coli"``
841 V :math:`\\neq 0` :math:`\\neq 0` ``"colo"``
842 ==== =============================== =================================== =============
844 References
845 ----------
846 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
848 .. [2] Johansen, S. 1995. *Likelihood-Based Inference in Cointegrated *
849 *Vector Autoregressive Models*. Oxford University Press.
850 """
852 def __init__(self, endog, exog=None, exog_coint=None, dates=None,
853 freq=None, missing="none", k_ar_diff=1, coint_rank=1,
854 deterministic="nc", seasons=0, first_season=0):
855 super(VECM, self).__init__(endog, exog, dates, freq,
856 missing=missing)
857 if exog_coint is not None and \
858 not exog_coint.shape[0] == endog.shape[0]:
859 raise ValueError("exog_coint must have as many rows as enodg_tot!")
860 if self.endog.ndim == 1:
861 raise ValueError("Only gave one variable to VECM")
862 self.y = self.endog.T
863 self.exog_coint = exog_coint
864 self.neqs = self.endog.shape[1]
865 self.k_ar = k_ar_diff + 1
866 self.k_ar_diff = k_ar_diff
867 self.coint_rank = coint_rank
868 self.deterministic = deterministic
869 self.seasons = seasons
870 self.first_season = first_season
871 self.load_coef_repr = "ec" # name for loading coef. (alpha) in summary
873 def fit(self, method="ml"):
874 """
875 Estimates the parameters of a VECM.
877 The estimation procedure is described on pp. 269-304 in [1]_.
879 Parameters
880 ----------
881 method : str {"ml"}, default: "ml"
882 Estimation method to use. "ml" stands for Maximum Likelihood.
884 Returns
885 -------
886 est : :class:`VECMResults`
888 References
889 ----------
890 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
891 """
892 if method == "ml":
893 return self._estimate_vecm_ml()
894 else:
895 raise ValueError("%s not recognized, must be among %s"
896 % (method, "ml"))
898 def _estimate_vecm_ml(self):
899 y_1_T, delta_y_1_T, y_lag1, delta_x = _endog_matrices(
900 self.y, self.exog, self.exog_coint, self.k_ar_diff,
901 self.deterministic, self.seasons, self.first_season)
902 T = y_1_T.shape[1]
904 s00, s01, s10, s11, s11_, _, v = _sij(delta_x, delta_y_1_T, y_lag1)
906 beta_tilde = (v[:, :self.coint_rank].T.dot(s11_)).T
907 beta_tilde = np.real_if_close(beta_tilde)
908 # normalize beta tilde such that eye(r) forms the first r rows of it:
909 beta_tilde = np.dot(beta_tilde, inv(beta_tilde[:self.coint_rank]))
910 alpha_tilde = s01.dot(beta_tilde).dot(
911 inv(beta_tilde.T.dot(s11).dot(beta_tilde)))
912 gamma_tilde = (delta_y_1_T - alpha_tilde.dot(beta_tilde.T).dot(y_lag1)
913 ).dot(delta_x.T).dot(inv(np.dot(delta_x, delta_x.T)))
914 temp = (delta_y_1_T - alpha_tilde.dot(beta_tilde.T).dot(y_lag1) -
915 gamma_tilde.dot(delta_x))
916 sigma_u_tilde = temp.dot(temp.T) / T
918 return VECMResults(self.y, self.exog, self.exog_coint, self.k_ar,
919 self.coint_rank, alpha_tilde, beta_tilde,
920 gamma_tilde, sigma_u_tilde,
921 deterministic=self.deterministic,
922 seasons=self.seasons, delta_y_1_T=delta_y_1_T,
923 y_lag1=y_lag1, delta_x=delta_x, model=self,
924 names=self.endog_names, dates=self.data.dates,
925 first_season=self.first_season)
927 @property
928 def _lagged_param_names(self):
929 """
930 Returns parameter names (for Gamma and deterministics) for the summary.
932 Returns
933 -------
934 param_names : list of str
935 Returns a list of parameter names for the lagged endogenous
936 parameters which are called :math:`\\Gamma` in [1]_
937 (see chapter 6).
938 If present in the model, also names for deterministic terms outside
939 the cointegration relation are returned. They name the elements of
940 the matrix C in [1]_ (p. 299).
942 References
943 ----------
944 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
945 """
946 param_names = []
948 # 1. Deterministic terms outside cointegration relation
949 if "co" in self.deterministic:
950 param_names += ["const.%s" % n for n in self.endog_names]
952 if self.seasons > 0:
953 param_names += ["season%d.%s" % (s, n)
954 for s in range(1, self.seasons)
955 for n in self.endog_names]
957 if "lo" in self.deterministic:
958 param_names += ["lin_trend.%s" % n for n in self.endog_names]
960 if self.exog is not None:
961 param_names += ["exog%d.%s" % (exog_no, n)
962 for exog_no in range(1, self.exog.shape[1] + 1)
963 for n in self.endog_names]
965 # 2. lagged endogenous terms
966 param_names += [
967 "L%d.%s.%s" % (i+1, n1, n2)
968 for n2 in self.endog_names
969 for i in range(self.k_ar_diff)
970 for n1 in self.endog_names]
972 return param_names
974 @property
975 def _load_coef_param_names(self):
976 """
977 Returns parameter names (for alpha) for the summary.
979 Returns
980 -------
981 param_names : list of str
982 Returns a list of parameter names for the loading coefficients
983 which are called :math:`\\alpha` in [1]_ (see chapter 6).
985 References
986 ----------
987 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
988 """
989 param_names = []
991 if self.coint_rank == 0:
992 return None
994 # loading coefficients (alpha) # called "ec" in JMulTi, "ECT" in tsDyn,
995 param_names += [ # and "_ce" in Stata
996 self.load_coef_repr + "%d.%s" % (i+1, self.endog_names[j])
997 for j in range(self.neqs)
998 for i in range(self.coint_rank)
999 ]
1001 return param_names
1003 @property
1004 def _coint_param_names(self):
1005 """
1006 Returns parameter names (for beta and deterministics) for the summary.
1008 Returns
1009 -------
1010 param_names : list of str
1011 Returns a list of parameter names for the cointegration matrix
1012 as well as deterministic terms inside the cointegration relation
1013 (if present in the model).
1014 """
1015 # 1. cointegration matrix/vector
1016 param_names = []
1018 param_names += [("beta.%d." + self.load_coef_repr + "%d") % (j+1, i+1)
1019 for i in range(self.coint_rank)
1020 for j in range(self.neqs)]
1022 # 2. deterministic terms inside cointegration relation
1023 if "ci" in self.deterministic:
1024 param_names += ["const." + self.load_coef_repr + "%d" % (i+1)
1025 for i in range(self.coint_rank)]
1027 if "li" in self.deterministic:
1028 param_names += ["lin_trend." + self.load_coef_repr + "%d" % (i+1)
1029 for i in range(self.coint_rank)]
1031 if self.exog_coint is not None:
1032 param_names += ["exog_coint%d.%s" % (n+1, exog_no)
1033 for exog_no in range(1, self.exog_coint.shape[1]+1)
1034 for n in range(self.neqs)]
1036 return param_names
1039class VECMResults(object):
1040 """Class for holding estimation related results of a vector error
1041 correction model (VECM).
1043 Parameters
1044 ----------
1045 endog : ndarray (neqs x nobs_tot)
1046 Array of observations.
1047 exog : ndarray (nobs_tot x neqs) or `None`
1048 Deterministic terms outside the cointegration relation.
1049 exog_coint : ndarray (nobs_tot x neqs) or `None`
1050 Deterministic terms inside the cointegration relation.
1051 k_ar : int, >= 1
1052 Lags in the VAR representation. This implies that the number of lags in
1053 the VEC representation (=lagged differences) equals :math:`k_{ar} - 1`.
1054 coint_rank : int, 0 <= `coint_rank` <= neqs
1055 Cointegration rank, equals the rank of the matrix :math:`\\Pi` and the
1056 number of columns of :math:`\\alpha` and :math:`\\beta`.
1057 alpha : ndarray (neqs x `coint_rank`)
1058 Estimate for the parameter :math:`\\alpha` of a VECM.
1059 beta : ndarray (neqs x `coint_rank`)
1060 Estimate for the parameter :math:`\\beta` of a VECM.
1061 gamma : ndarray (neqs x neqs*(k_ar-1))
1062 Array containing the estimates of the :math:`k_{ar}-1` parameter
1063 matrices :math:`\\Gamma_1, \\dots, \\Gamma_{k_{ar}-1}` of a
1064 VECM(:math:`k_{ar}-1`). The submatrices are stacked horizontally from
1065 left to right.
1066 sigma_u : ndarray (neqs x neqs)
1067 Estimate of white noise process covariance matrix :math:`\\Sigma_u`.
1068 deterministic : str {``"nc"``, ``"co"``, ``"ci"``, ``"lo"``, ``"li"``}
1069 * ``"nc"`` - no deterministic terms
1070 * ``"co"`` - constant outside the cointegration relation
1071 * ``"ci"`` - constant within the cointegration relation
1072 * ``"lo"`` - linear trend outside the cointegration relation
1073 * ``"li"`` - linear trend within the cointegration relation
1075 Combinations of these are possible (e.g. ``"cili"`` or ``"colo"`` for
1076 linear trend with intercept). See the docstring of the
1077 :class:`VECM`-class for more information.
1078 seasons : int, default: 0
1079 Number of periods in a seasonal cycle. 0 means no seasons.
1080 first_season : int, default: 0
1081 Season of the first observation.
1082 delta_y_1_T : ndarray or `None`, default: `None`
1083 Auxiliary array for internal computations. It will be calculated if
1084 not given as parameter.
1085 y_lag1 : ndarray or `None`, default: `None`
1086 Auxiliary array for internal computations. It will be calculated if
1087 not given as parameter.
1088 delta_x : ndarray or `None`, default: `None`
1089 Auxiliary array for internal computations. It will be calculated if
1090 not given as parameter.
1091 model : :class:`VECM`
1092 An instance of the :class:`VECM`-class.
1093 names : list of str
1094 Each str in the list represents the name of a variable of the time
1095 series.
1096 dates : array_like
1097 For example a DatetimeIndex of length nobs_tot.
1099 Attributes
1100 ----------
1101 nobs : int
1102 Number of observations (excluding the presample).
1103 model : see Parameters
1104 y_all : see `endog` in Parameters
1105 exog : see Parameters
1106 exog_coint : see Parameters
1107 names : see Parameters
1108 dates : see Parameters
1109 neqs : int
1110 Number of variables in the time series.
1111 k_ar : see Parameters
1112 deterministic : see Parameters
1113 seasons : see Parameters
1114 first_season : see Parameters
1115 alpha : see Parameters
1116 beta : see Parameters
1117 gamma : see Parameters
1118 sigma_u : see Parameters
1119 det_coef_coint : ndarray (#(determinist. terms inside the coint. rel.) x `coint_rank`)
1120 Estimated coefficients for the all deterministic terms inside the
1121 cointegration relation.
1122 const_coint : ndarray (1 x `coint_rank`)
1123 If there is a constant deterministic term inside the cointegration
1124 relation, then `const_coint` is the first row of `det_coef_coint`.
1125 Otherwise it's an ndarray of zeros.
1126 lin_trend_coint : ndarray (1 x `coint_rank`)
1127 If there is a linear deterministic term inside the cointegration
1128 relation, then `lin_trend_coint` contains the corresponding estimated
1129 coefficients. As such it represents the corresponding row of
1130 `det_coef_coint`. If there is no linear deterministic term inside
1131 the cointegration relation, then `lin_trend_coint` is an ndarray of
1132 zeros.
1133 exog_coint_coefs : ndarray (exog_coint.shape[1] x `coint_rank`) or `None`
1134 If deterministic terms inside the cointegration relation are passed via
1135 the `exog_coint` parameter, then `exog_coint_coefs` contains the
1136 corresponding estimated coefficients. As such `exog_coint_coefs`
1137 represents the last rows of `det_coef_coint`.
1138 If no deterministic terms were passed via the `exog_coint` parameter,
1139 this attribute is `None`.
1140 det_coef : ndarray (neqs x #(deterministic terms outside the coint. rel.))
1141 Estimated coefficients for the all deterministic terms outside the
1142 cointegration relation.
1143 const : ndarray (neqs x 1) or (neqs x 0)
1144 If a constant deterministic term outside the cointegration is specified
1145 within the deterministic parameter, then `const` is the first column
1146 of `det_coef_coint`. Otherwise it's an ndarray of size zero.
1147 seasonal : ndarray (neqs x seasons)
1148 If the `seasons` parameter is > 0, then seasonal contains the
1149 estimated coefficients corresponding to the seasonal terms. Otherwise
1150 it's an ndarray of size zero.
1151 lin_trend : ndarray (neqs x 1) or (neqs x 0)
1152 If a linear deterministic term outside the cointegration is specified
1153 within the deterministic parameter, then `lin_trend` contains the
1154 corresponding estimated coefficients. As such it represents the
1155 corresponding column of `det_coef_coint`. If there is no linear
1156 deterministic term outside the cointegration relation, then
1157 `lin_trend` is an ndarray of size zero.
1158 exog_coefs : ndarray (neqs x exog_coefs.shape[1])
1159 If deterministic terms outside the cointegration relation are passed
1160 via the `exog` parameter, then `exog_coefs` contains the
1161 corresponding estimated coefficients. As such `exog_coefs` represents
1162 the last columns of `det_coef`.
1163 If no deterministic terms were passed via the `exog` parameter, this
1164 attribute is an ndarray of size zero.
1165 _delta_y_1_T : see delta_y_1_T in Parameters
1166 _y_lag1 : see y_lag1 in Parameters
1167 _delta_x : see delta_x in Parameters
1168 coint_rank : int
1169 Cointegration rank, equals the rank of the matrix :math:`\\Pi` and the
1170 number of columns of :math:`\\alpha` and :math:`\\beta`.
1171 llf : float
1172 The model's log-likelihood.
1173 cov_params : ndarray (d x d)
1174 Covariance matrix of the parameters. The number of rows and columns, d
1175 (used in the dimension specification of this argument),
1176 is equal to neqs * (neqs+num_det_coef_coint + neqs*(k_ar-1)+number of
1177 deterministic dummy variables outside the cointegration relation). For
1178 the case with no deterministic terms this matrix is defined on p. 287
1179 in [1]_ as :math:`\\Sigma_{co}` and its relationship to the
1180 ML-estimators can be seen in eq. (7.2.21) on p. 296 in [1]_.
1181 cov_params_wo_det : ndarray
1182 Covariance matrix of the parameters
1183 :math:`\\tilde{\\Pi}, \\tilde{\\Gamma}` where
1184 :math:`\\tilde{\\Pi} = \\tilde{\\alpha} \\tilde{\\beta'}`.
1185 Equals `cov_params` without the rows and columns related to
1186 deterministic terms. This matrix is defined as :math:`\\Sigma_{co}` on
1187 p. 287 in [1]_.
1188 stderr_params : ndarray (d)
1189 Array containing the standard errors of :math:`\\Pi`, :math:`\\Gamma`,
1190 and estimated parameters related to deterministic terms.
1191 stderr_coint : ndarray (neqs+num_det_coef_coint x `coint_rank`)
1192 Array containing the standard errors of :math:`\\beta` and estimated
1193 parameters related to deterministic terms inside the cointegration
1194 relation.
1195 stderr_alpha : ndarray (neqs x `coint_rank`)
1196 The standard errors of :math:`\\alpha`.
1197 stderr_beta : ndarray (neqs x `coint_rank`)
1198 The standard errors of :math:`\\beta`.
1199 stderr_det_coef_coint : ndarray (num_det_coef_coint x `coint_rank`)
1200 The standard errors of estimated the parameters related to
1201 deterministic terms inside the cointegration relation.
1202 stderr_gamma : ndarray (neqs x neqs*(k_ar-1))
1203 The standard errors of :math:`\\Gamma_1, \\ldots, \\Gamma_{k_{ar}-1}`.
1204 stderr_det_coef : ndarray (neqs x det. terms outside the coint. relation)
1205 The standard errors of estimated the parameters related to
1206 deterministic terms outside the cointegration relation.
1207 tvalues_alpha : ndarray (neqs x `coint_rank`)
1208 tvalues_beta : ndarray (neqs x `coint_rank`)
1209 tvalues_det_coef_coint : ndarray (num_det_coef_coint x `coint_rank`)
1210 tvalues_gamma : ndarray (neqs x neqs*(k_ar-1))
1211 tvalues_det_coef : ndarray (neqs x det. terms outside the coint. relation)
1212 pvalues_alpha : ndarray (neqs x `coint_rank`)
1213 pvalues_beta : ndarray (neqs x `coint_rank`)
1214 pvalues_det_coef_coint : ndarray (num_det_coef_coint x `coint_rank`)
1215 pvalues_gamma : ndarray (neqs x neqs*(k_ar-1))
1216 pvalues_det_coef : ndarray (neqs x det. terms outside the coint. relation)
1217 var_rep : (k_ar x neqs x neqs)
1218 KxK parameter matrices :math:`A_i` of the corresponding VAR
1219 representation. If the return value is assigned to a variable ``A``,
1220 these matrices can be accessed via ``A[i]`` for
1221 :math:`i=0, \\ldots, k_{ar}-1`.
1222 cov_var_repr : ndarray (neqs**2 * k_ar x neqs**2 * k_ar)
1223 This matrix is called :math:`\\Sigma^{co}_{\\alpha}` on p. 289 in [1]_.
1224 It is needed e.g. for impulse-response-analysis.
1225 fittedvalues : ndarray (nobs x neqs)
1226 The predicted in-sample values of the models' endogenous variables.
1227 resid : ndarray (nobs x neqs)
1228 The residuals.
1230 References
1231 ----------
1232 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
1233 """
1235 def __init__(self, endog, exog, exog_coint, k_ar,
1236 coint_rank, alpha, beta, gamma, sigma_u, deterministic='nc',
1237 seasons=0, first_season=0, delta_y_1_T=None, y_lag1=None,
1238 delta_x=None, model=None, names=None, dates=None):
1239 self.model = model
1240 self.y_all = endog
1241 self.exog = exog
1242 self.exog_coint = exog_coint
1243 self.names = names
1244 self.dates = dates
1245 self.neqs = endog.shape[0]
1246 self.k_ar = k_ar
1247 self.deterministic = deterministic
1248 self.seasons = seasons
1249 self.first_season = first_season
1251 self.coint_rank = coint_rank
1252 if alpha.dtype == np.complex128 and np.all(np.imag(alpha) == 0):
1253 alpha = np.real_if_close(alpha)
1254 if beta.dtype == np.complex128 and np.all(np.imag(beta) == 0):
1255 beta = np.real_if_close(beta)
1256 if gamma.dtype == np.complex128 and np.all(np.imag(gamma) == 0):
1257 gamma = np.real_if_close(gamma)
1259 self.alpha = alpha
1260 self.beta, self.det_coef_coint = np.vsplit(beta, [self.neqs])
1261 self.gamma, self.det_coef = np.hsplit(gamma,
1262 [self.neqs * (self.k_ar - 1)])
1264 if "ci" in deterministic:
1265 self.const_coint = self.det_coef_coint[:1, :]
1266 else:
1267 self.const_coint = np.zeros(coint_rank).reshape((1, -1))
1268 if "li" in deterministic:
1269 start = 1 if "ci" in deterministic else 0
1270 self.lin_trend_coint = self.det_coef_coint[start:start+1, :]
1271 else:
1272 self.lin_trend_coint = np.zeros(coint_rank).reshape(1, -1)
1273 if self.exog_coint is not None:
1274 start = ("ci" in deterministic) + ("li" in deterministic)
1275 self.exog_coint_coefs = self.det_coef_coint[start:, :]
1276 else:
1277 self.exog_coint_coefs = None
1279 split_const_season = 1 if "co" in deterministic else 0
1280 split_season_lin = split_const_season + ((seasons-1) if seasons else 0)
1281 if "lo" in deterministic:
1282 split_lin_exog = split_season_lin + 1
1283 else:
1284 split_lin_exog = split_season_lin
1285 self.const, self.seasonal, self.lin_trend, self.exog_coefs = \
1286 np.hsplit(self.det_coef,
1287 [split_const_season, split_season_lin, split_lin_exog])
1289 self.sigma_u = sigma_u
1291 if y_lag1 is not None and delta_x is not None \
1292 and delta_y_1_T is not None:
1293 self._delta_y_1_T = delta_y_1_T
1294 self._y_lag1 = y_lag1
1295 self._delta_x = delta_x
1296 else:
1297 _y_1_T, self._delta_y_1_T, self._y_lag1, self._delta_x = \
1298 _endog_matrices(endog, self.exog, k_ar,
1299 deterministic, seasons)
1300 self.nobs = self._y_lag1.shape[1]
1302 @cache_readonly
1303 def llf(self): # Lutkepohl p. 295 (7.2.20)
1304 """
1305 Compute the VECM's loglikelihood.
1306 """
1307 K = self.neqs
1308 T = self.nobs
1309 r = self.coint_rank
1310 s00, _, _, _, _, lambd, _ = _sij(self._delta_x, self._delta_y_1_T,
1311 self._y_lag1)
1312 return - K * T * np.log(2*np.pi) / 2 \
1313 - T * (np.log(np.linalg.det(s00)) + sum(np.log(1-lambd)[:r])) / 2 \
1314 - K * T / 2
1316 @cache_readonly
1317 def _cov_sigma(self):
1318 sigma_u = self.sigma_u
1319 d = duplication_matrix(self.neqs)
1320 d_K_plus = np.linalg.pinv(d)
1321 # compare p. 93, 297 Lutkepohl (2005)
1322 return 2 * (d_K_plus @ np.kron(sigma_u, sigma_u) @ d_K_plus.T)
1324 @cache_readonly
1325 def cov_params_default(self): # p.296 (7.2.21)
1326 # Sigma_co described on p. 287
1327 beta = self.beta
1328 if self.det_coef_coint.size > 0:
1329 beta = vstack((beta, self.det_coef_coint))
1330 dt = self.deterministic
1331 num_det = ("co" in dt) + ("lo" in dt)
1332 num_det += (self.seasons-1) if self.seasons else 0
1333 if self.exog is not None:
1334 num_det += self.exog.shape[1]
1335 b_id = scipy.linalg.block_diag(beta,
1336 np.identity(self.neqs * (self.k_ar-1) +
1337 num_det))
1339 y_lag1 = self._y_lag1
1340 b_y = beta.T.dot(y_lag1)
1341 omega11 = b_y.dot(b_y.T)
1342 omega12 = b_y.dot(self._delta_x.T)
1343 omega21 = omega12.T
1344 omega22 = self._delta_x.dot(self._delta_x.T)
1345 omega = np.bmat([[omega11, omega12],
1346 [omega21, omega22]]).A
1348 mat1 = b_id.dot(inv(omega)).dot(b_id.T)
1349 return np.kron(mat1, self.sigma_u)
1351 @cache_readonly
1352 def cov_params_wo_det(self):
1353 # rows & cols to be dropped (related to deterministic terms inside the
1354 # cointegration relation)
1355 start_i = self.neqs**2 # first elements belong to alpha @ beta.T
1356 end_i = start_i + self.neqs * self.det_coef_coint.shape[0]
1357 to_drop_i = np.arange(start_i, end_i)
1359 # rows & cols to be dropped (related to deterministic terms outside of
1360 # the cointegration relation)
1361 cov = self.cov_params_default
1362 cov_size = len(cov)
1363 to_drop_o = np.arange(cov_size-self.det_coef.size, cov_size)
1365 to_drop = np.union1d(to_drop_i, to_drop_o)
1367 mask = np.ones(cov.shape, dtype=bool)
1368 mask[to_drop] = False
1369 mask[:, to_drop] = False
1370 cov_size_new = mask.sum(axis=0)[0]
1371 return cov[mask].reshape((cov_size_new, cov_size_new))
1373 # standard errors:
1374 @cache_readonly
1375 def stderr_params(self):
1376 return np.sqrt(np.diag(self.cov_params_default))
1378 @cache_readonly
1379 def stderr_coint(self):
1380 """
1381 Standard errors of beta and deterministic terms inside the
1382 cointegration relation.
1384 Notes
1385 -----
1386 See p. 297 in [1]_. Using the rule
1388 .. math::
1390 vec(B R) = (B' \\otimes I) vec(R)
1392 for two matrices B and R which are compatible for multiplication.
1393 This is rule (3) on p. 662 in [1]_.
1395 References
1396 ----------
1397 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
1398 """
1399 r = self.coint_rank
1400 _, r1 = _r_matrices(self._delta_y_1_T, self._y_lag1, self._delta_x)
1401 r12 = r1[r:]
1402 if r12.size == 0:
1403 return np.zeros((r, r))
1404 mat1 = inv(r12.dot(r12.T))
1405 mat1 = np.kron(mat1.T, np.identity(r))
1406 det = self.det_coef_coint.shape[0]
1407 mat2 = np.kron(np.identity(self.neqs-r+det),
1408 inv(self.alpha.T @ inv(self.sigma_u) @ self.alpha))
1409 first_rows = np.zeros((r, r))
1410 last_rows_1d = np.sqrt(np.diag(mat1.dot(mat2)))
1411 last_rows = last_rows_1d.reshape((self.neqs-r+det, r),
1412 order="F")
1413 return vstack((first_rows,
1414 last_rows))
1416 @cache_readonly
1417 def stderr_alpha(self):
1418 ret_1dim = self.stderr_params[:self.alpha.size]
1419 return ret_1dim.reshape(self.alpha.shape, order="F")
1421 @cache_readonly
1422 def stderr_beta(self):
1423 ret_1dim = self.stderr_coint[:self.beta.shape[0]]
1424 return ret_1dim.reshape(self.beta.shape, order="F")
1426 @cache_readonly
1427 def stderr_det_coef_coint(self):
1428 if self.det_coef_coint.size == 0:
1429 return self.det_coef_coint # 0-size array
1430 ret_1dim = self.stderr_coint[self.beta.shape[0]:]
1431 return ret_1dim.reshape(self.det_coef_coint.shape, order="F")
1433 @cache_readonly
1434 def stderr_gamma(self):
1435 start = self.alpha.shape[0] * (self.beta.shape[0] +
1436 self.det_coef_coint.shape[0])
1437 ret_1dim = self.stderr_params[start:start+self.gamma.size]
1438 return ret_1dim.reshape(self.gamma.shape, order="F")
1440 @cache_readonly
1441 def stderr_det_coef(self):
1442 if self.det_coef.size == 0:
1443 return self.det_coef # 0-size array
1444 ret1_1dim = self.stderr_params[-self.det_coef.size:]
1445 return ret1_1dim.reshape(self.det_coef.shape, order="F")
1447 # t-values:
1448 @cache_readonly
1449 def tvalues_alpha(self):
1450 return self.alpha / self.stderr_alpha
1452 @cache_readonly
1453 def tvalues_beta(self):
1454 r = self.coint_rank
1455 first_rows = np.zeros((r, r))
1456 last_rows = self.beta[r:] / self.stderr_beta[r:]
1457 return vstack((first_rows,
1458 last_rows))
1460 @cache_readonly
1461 def tvalues_det_coef_coint(self):
1462 if self.det_coef_coint.size == 0:
1463 return self.det_coef_coint # 0-size array
1464 return self.det_coef_coint / self.stderr_det_coef_coint
1466 @cache_readonly
1467 def tvalues_gamma(self):
1468 return self.gamma / self.stderr_gamma
1470 @cache_readonly
1471 def tvalues_det_coef(self):
1472 if self.det_coef.size == 0:
1473 return self.det_coef # 0-size array
1474 return self.det_coef / self.stderr_det_coef
1476 # p-values:
1477 @cache_readonly
1478 def pvalues_alpha(self):
1479 return (1-scipy.stats.norm.cdf(abs(self.tvalues_alpha))) * 2
1481 @cache_readonly
1482 def pvalues_beta(self):
1483 first_rows = np.zeros((self.coint_rank, self.coint_rank))
1484 tval_last = self.tvalues_beta[self.coint_rank:]
1485 last_rows = (1-scipy.stats.norm.cdf(abs(tval_last))) * 2 # student-t
1486 return vstack((first_rows,
1487 last_rows))
1489 @cache_readonly
1490 def pvalues_det_coef_coint(self):
1491 if self.det_coef_coint.size == 0:
1492 return self.det_coef_coint # 0-size array
1493 return (1-scipy.stats.norm.cdf(abs(self.tvalues_det_coef_coint))) * 2
1495 @cache_readonly
1496 def pvalues_gamma(self):
1497 return (1-scipy.stats.norm.cdf(abs(self.tvalues_gamma))) * 2
1499 @cache_readonly
1500 def pvalues_det_coef(self):
1501 if self.det_coef.size == 0:
1502 return self.det_coef # 0-size array
1503 return (1-scipy.stats.norm.cdf(abs(self.tvalues_det_coef))) * 2
1505 # confidence intervals
1506 def _make_conf_int(self, est, stderr, alpha):
1507 struct_arr = np.zeros(est.shape, dtype=[("lower", float),
1508 ("upper", float)])
1509 struct_arr["lower"] = est - scipy.stats.norm.ppf(1 - alpha/2) * stderr
1510 struct_arr["upper"] = est + scipy.stats.norm.ppf(1 - alpha/2) * stderr
1511 return struct_arr
1513 def conf_int_alpha(self, alpha=0.05):
1514 return self._make_conf_int(self.alpha, self.stderr_alpha, alpha)
1516 def conf_int_beta(self, alpha=0.05):
1517 return self._make_conf_int(self.beta, self.stderr_beta, alpha)
1519 def conf_int_det_coef_coint(self, alpha=0.05):
1520 return self._make_conf_int(self.det_coef_coint,
1521 self.stderr_det_coef_coint, alpha)
1523 def conf_int_gamma(self, alpha=0.05):
1524 return self._make_conf_int(self.gamma, self.stderr_gamma, alpha)
1526 def conf_int_det_coef(self, alpha=0.05):
1527 return self._make_conf_int(self.det_coef, self.stderr_det_coef, alpha)
1529 @cache_readonly
1530 def var_rep(self):
1531 pi = self.alpha.dot(self.beta.T)
1532 gamma = self.gamma
1533 K = self.neqs
1534 A = np.zeros((self.k_ar, K, K))
1535 A[0] = pi + np.identity(K)
1536 if self.gamma.size > 0:
1537 A[0] += gamma[:, :K]
1538 A[self.k_ar-1] = - gamma[:, K*(self.k_ar-2):]
1539 for i in range(1, self.k_ar-1):
1540 A[i] = gamma[:, K*i:K*(i+1)] - gamma[:, K*(i-1):K*i]
1541 return A
1543 @cache_readonly
1544 def cov_var_repr(self):
1545 """
1546 Gives the covariance matrix of the corresponding VAR-representation.
1548 More precisely, the covariance matrix of the vector consisting of the
1549 columns of the corresponding VAR coefficient matrices (i.e.
1550 vec(self.var_rep)).
1552 Returns
1553 -------
1554 cov : array (neqs**2 * k_ar x neqs**2 * k_ar)
1555 """
1556 # This implementation is using the fact that for a random variable x
1557 # with covariance matrix Sigma_x the following holds:
1558 # B @ x with B being a suitably sized matrix has the covariance matrix
1559 # B @ Sigma_x @ B.T. The arrays called vecm_var_transformation and
1560 # self.cov_params_wo_det in the code play the roles of B and Sigma_x
1561 # respectively. The elements of the random variable x are the elements
1562 # of the estimated matrices Pi (alpha @ beta.T) and Gamma.
1563 # Alternatively the following code (commented out) would yield the same
1564 # result (following p. 289 in Lutkepohl):
1565 # K, p = self.neqs, self.k_ar
1566 # w = np.identity(K * p)
1567 # w[np.arange(K, len(w)), np.arange(K, len(w))] *= (-1)
1568 # w[np.arange(K, len(w)), np.arange(len(w)-K)] = 1
1569 #
1570 # w_eye = np.kron(w, np.identity(K))
1571 #
1572 # return w_eye.T @ self.cov_params_default @ w_eye
1574 if self.k_ar - 1 == 0:
1575 return self.cov_params_wo_det
1577 vecm_var_transformation = np.zeros((self.neqs**2 * self.k_ar,
1578 self.neqs**2 * self.k_ar))
1579 eye = np.identity(self.neqs**2)
1580 # for A_1:
1581 vecm_var_transformation[:self.neqs**2, :2*self.neqs**2] = hstack(
1582 (eye, eye))
1583 # for A_i, where i = 2, ..., k_ar-1
1584 for i in range(2, self.k_ar):
1585 start_row = self.neqs**2 + (i-2) * self.neqs**2
1586 start_col = self.neqs**2 + (i-2) * self.neqs**2
1587 vecm_var_transformation[start_row:start_row+self.neqs**2,
1588 start_col:start_col+2*self.neqs**2] = hstack((-eye, eye))
1589 # for A_p:
1590 vecm_var_transformation[-self.neqs**2:, -self.neqs**2:] = -eye
1591 vvt = vecm_var_transformation
1592 return vvt @ self.cov_params_wo_det @ vvt.T
1594 def ma_rep(self, maxn=10):
1595 return ma_rep(self.var_rep, maxn)
1597 @cache_readonly
1598 def _chol_sigma_u(self):
1599 return np.linalg.cholesky(self.sigma_u)
1601 def orth_ma_rep(self, maxn=10, P=None):
1602 """Compute orthogonalized MA coefficient matrices.
1604 For this purpose a matrix P is used which fulfills
1605 :math:`\\Sigma_u = PP^\\prime`. P defaults to the Cholesky
1606 decomposition of :math:`\\Sigma_u`
1608 Parameters
1609 ----------
1610 maxn : int
1611 Number of coefficient matrices to compute
1612 P : ndarray (neqs x neqs), optional
1613 Matrix such that :math:`\\Sigma_u = PP'`. Defaults to Cholesky
1614 decomposition.
1616 Returns
1617 -------
1618 coefs : ndarray (maxn x neqs x neqs)
1619 """
1620 return orth_ma_rep(self, maxn, P)
1622 def predict(self, steps=5, alpha=None, exog_fc=None, exog_coint_fc=None):
1623 """
1624 Calculate future values of the time series.
1626 Parameters
1627 ----------
1628 steps : int
1629 Prediction horizon.
1630 alpha : float, 0 < `alpha` < 1 or None
1631 If None, compute point forecast only.
1632 If float, compute confidence intervals too. In this case the
1633 argument stands for the confidence level.
1634 exog : ndarray (steps x self.exog.shape[1])
1635 If self.exog is not None, then information about the future values
1636 of exog have to be passed via this parameter. The ndarray may be
1637 larger in it's first dimension. In this case only the first steps
1638 rows will be considered.
1640 Returns
1641 -------
1642 forecast - ndarray (steps x neqs) or three ndarrays
1643 In case of a point forecast: each row of the returned ndarray
1644 represents the forecast of the neqs variables for a specific
1645 period. The first row (index [0]) is the forecast for the next
1646 period, the last row (index [steps-1]) is the steps-periods-ahead-
1647 forecast.
1648 """
1649 if self.exog is not None and exog_fc is None:
1650 raise ValueError("exog_fc is None: Please pass the future values "
1651 "of the VECM's exog terms via the exog_fc "
1652 "argument!")
1653 if self.exog is None and exog_fc is not None:
1654 raise ValueError("This VECMResult-instance's exog attribute is "
1655 "None. Please do not pass a non-None value as the "
1656 "method's exog_fc-argument.")
1657 if exog_fc is not None and exog_fc.shape[0] < steps:
1658 raise ValueError("The argument exog_fc must have at least steps "
1659 "elements in its first dimension")
1661 if self.exog_coint is not None and exog_coint_fc is None:
1662 raise ValueError("exog_coint_fc is None: Please pass the future "
1663 "values of the VECM's exog_coint terms via the "
1664 "exog_coint_fc argument!")
1665 if self.exog_coint is None and exog_coint_fc is not None:
1666 raise ValueError("This VECMResult-instance's exog_coint attribute "
1667 "is None. Please do not pass a non-None value as "
1668 "the method's exog_coint_fc-argument.")
1669 if exog_coint_fc is not None and exog_coint_fc.shape[0] < steps - 1:
1670 raise ValueError("The argument exog_coint_fc must have at least "
1671 "steps elements in its first dimension")
1673 last_observations = self.y_all.T[-self.k_ar:]
1674 exog = []
1675 trend_coefs = []
1677 # adding deterministic terms outside cointegration relation
1678 exog_const = np.ones(steps)
1679 nobs_tot = self.nobs + self.k_ar
1680 if self.const.size > 0:
1681 exog.append(exog_const)
1682 trend_coefs.append(self.const.T)
1684 if self.seasons > 0:
1685 first_future_season = (self.first_season + nobs_tot) % self.seasons
1686 exog_seasonal = seasonal_dummies(self.seasons, steps,
1687 first_future_season, True)
1688 exog.append(exog_seasonal)
1689 trend_coefs.append(self.seasonal.T)
1691 exog_lin_trend = _linear_trend(self.nobs, self.k_ar)
1692 exog_lin_trend = exog_lin_trend[-1] + 1 + np.arange(steps)
1693 if self.lin_trend.size > 0:
1694 exog.append(exog_lin_trend)
1695 trend_coefs.append(self.lin_trend.T)
1697 if exog_fc is not None:
1698 exog.append(exog_fc[:steps])
1699 trend_coefs.append(self.exog_coefs.T)
1701 # adding deterministic terms inside cointegration relation
1702 if "ci" in self.deterministic:
1703 exog.append(exog_const)
1704 trend_coefs.append(self.alpha.dot(self.const_coint.T).T)
1705 exog_lin_trend_coint = _linear_trend(self.nobs, self.k_ar, coint=True)
1706 exog_lin_trend_coint = exog_lin_trend_coint[-1] + 1 + np.arange(steps)
1707 if "li" in self.deterministic:
1708 exog.append(exog_lin_trend_coint)
1709 trend_coefs.append(self.alpha.dot(self.lin_trend_coint.T).T)
1711 if exog_coint_fc is not None:
1712 if exog_coint_fc.ndim == 1:
1713 exog_coint_fc = exog_coint_fc[:, None] # make 2-D
1714 exog_coint_fc = np.vstack((self.exog_coint[-1:],
1715 exog_coint_fc[:steps-1]))
1716 exog.append(exog_coint_fc)
1717 trend_coefs.append(self.alpha.dot(self.exog_coint_coefs.T).T)
1719 # glueing all deterministics together
1720 exog = np.column_stack(exog) if exog != [] else None
1721 if trend_coefs != []:
1722 trend_coefs = np.row_stack(trend_coefs)
1723 else:
1724 trend_coefs = None
1726 # call the forecasting function of the VAR-module
1727 if alpha is not None:
1728 return forecast_interval(last_observations, self.var_rep,
1729 trend_coefs, self.sigma_u, steps,
1730 alpha=alpha,
1731 exog=exog)
1732 else:
1733 return forecast(last_observations, self.var_rep, trend_coefs,
1734 steps, exog)
1736 def plot_forecast(self, steps, alpha=0.05, plot_conf_int=True,
1737 n_last_obs=None):
1738 """
1739 Plot the forecast.
1741 Parameters
1742 ----------
1743 steps : int
1744 Prediction horizon.
1745 alpha : float, 0 < `alpha` < 1
1746 The confidence level.
1747 plot_conf_int : bool, default: True
1748 If True, plot bounds of confidence intervals.
1749 n_last_obs : int or None, default: None
1750 If int, restrict plotted history to n_last_obs observations.
1751 If None, include the whole history in the plot.
1752 """
1753 mid, lower, upper = self.predict(steps, alpha=alpha)
1755 y = self.y_all.T
1756 y = y[self.k_ar:] if n_last_obs is None else y[-n_last_obs:]
1757 plot.plot_var_forc(y, mid, lower, upper, names=self.names,
1758 plot_stderr=plot_conf_int,
1759 legend_options={"loc": "lower left"})
1761 def test_granger_causality(self, caused, causing=None, signif=0.05):
1762 r"""
1763 Test for Granger-causality.
1765 The concept of Granger-causality is described in chapter 7.6.3 of [1]_.
1766 Test |H0|: "The variables in `causing` do not Granger-cause those in
1767 `caused`" against |H1|: "`causing` is Granger-causal for
1768 `caused`".
1770 Parameters
1771 ----------
1772 caused : int or str or sequence of int or str
1773 If int or str, test whether the variable specified via this index
1774 (int) or name (str) is Granger-caused by the variable(s) specified
1775 by `causing`.
1776 If a sequence of int or str, test whether the corresponding
1777 variables are Granger-caused by the variable(s) specified
1778 by `causing`.
1779 causing : int or str or sequence of int or str or `None`, default: `None`
1780 If int or str, test whether the variable specified via this index
1781 (int) or name (str) is Granger-causing the variable(s) specified by
1782 `caused`.
1783 If a sequence of int or str, test whether the corresponding
1784 variables are Granger-causing the variable(s) specified by
1785 `caused`.
1786 If `None`, `causing` is assumed to be the complement of
1787 `caused` (the remaining variables of the system).
1788 signif : float, 0 < `signif` < 1, default 5 %
1789 Significance level for computing critical values for test,
1790 defaulting to standard 0.05 level.
1792 Returns
1793 -------
1794 results : :class:`statsmodels.tsa.vector_ar.hypothesis_test_results.CausalityTestResults`
1796 References
1797 ----------
1798 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
1800 .. |H0| replace:: H\ :sub:`0`
1802 .. |H1| replace:: H\ :sub:`1`
1803 """
1804 if not (0 < signif < 1):
1805 raise ValueError("signif has to be between 0 and 1")
1807 allowed_types = (str, int)
1809 if isinstance(caused, allowed_types):
1810 caused = [caused]
1811 if not all(isinstance(c, allowed_types) for c in caused):
1812 raise TypeError("caused has to be of type string or int (or a "
1813 "sequence of these types).")
1814 caused = [self.names[c] if type(c) == int else c for c in caused]
1815 caused_ind = [get_index(self.names, c) for c in caused]
1817 if causing is not None:
1819 if isinstance(causing, allowed_types):
1820 causing = [causing]
1821 if not all(isinstance(c, allowed_types) for c in causing):
1822 raise TypeError("causing has to be of type string or int (or "
1823 "a sequence of these types) or None.")
1824 causing = [self.names[c] if type(c) == int else c for c in causing]
1825 causing_ind = [get_index(self.names, c) for c in causing]
1827 if causing is None:
1828 causing_ind = [i for i in range(self.neqs) if i not in caused_ind]
1829 causing = [self.names[c] for c in causing_ind]
1831 y, k, t, p = self.y_all, self.neqs, self.nobs - 1, self.k_ar + 1
1832 exog = _deterministic_to_exog(self.deterministic, self.seasons,
1833 nobs_tot=self.nobs + self.k_ar,
1834 first_season=self.first_season,
1835 seasons_centered=True, exog=self.exog,
1836 exog_coint=self.exog_coint)
1837 var_results = VAR(y.T, exog).fit(maxlags=p, trend="n")
1839 # num_restr is called N in Lutkepohl
1840 num_restr = len(causing) * len(caused) * (p - 1)
1841 num_det_terms = _num_det_vars(self.deterministic, self.seasons)
1842 if self.exog is not None:
1843 num_det_terms += self.exog.shape[1]
1844 if self.exog_coint is not None:
1845 num_det_terms += self.exog_coint.shape[1]
1847 # Make restriction matrix
1848 C = np.zeros((num_restr, k*num_det_terms + k**2 * (p-1)), dtype=float)
1849 cols_det = k * num_det_terms
1850 row = 0
1851 for j in range(p-1):
1852 for ing_ind in causing_ind:
1853 for ed_ind in caused_ind:
1854 C[row, cols_det + ed_ind + k * ing_ind + k**2 * j] = 1
1855 row += 1
1856 Ca = np.dot(C, vec(var_results.params[:-k].T))
1858 x_min_p_components = []
1859 if exog is not None:
1860 x_min_p_components.append(exog[-t:].T)
1862 x_min_p = np.zeros((k * p, t))
1863 for i in range(p-1): # fll first k * k_ar rows of x_min_p
1864 x_min_p[i*k:(i+1)*k, :] = y[:, p-1-i:-1-i] - y[:, :-p]
1865 x_min_p[-k:, :] = y[:, :-p] # fill last rows of x_min_p
1866 x_min_p_components.append(x_min_p)
1868 x_min_p = np.row_stack(x_min_p_components)
1869 x_x = np.dot(x_min_p, x_min_p.T) # k*k_ar x k*k_ar
1870 x_x_11 = inv(x_x)[:k*(p-1) + num_det_terms,
1871 :k*(p-1) + num_det_terms] # k*(k_ar-1) x k*(k_ar-1)
1872 # For VAR-models with parameter restrictions the denominator in the
1873 # calculation of sigma_u is nobs and not (nobs-k*k_ar-num_det_terms).
1874 # Testing for Granger-causality means testing for restricted
1875 # parameters, thus the former of the two denominators is used. As
1876 # Lutkepohl states, both variants of the estimated sigma_u are
1877 # possible. (see Lutkepohl, p.198)
1878 # The choice of the denominator T has also the advantage of getting the
1879 # same results as the reference software JMulTi.
1880 sigma_u = var_results.sigma_u * (t-k*p-num_det_terms) / t
1881 sig_alpha_min_p = t * np.kron(x_x_11, sigma_u) # k**2*(p-1)xk**2*(p-1)
1882 middle = inv(C @ sig_alpha_min_p @ C.T)
1884 wald_statistic = t * (Ca.T @ middle @ Ca)
1885 f_statistic = wald_statistic / num_restr
1886 df = (num_restr, k * var_results.df_resid)
1887 f_distribution = scipy.stats.f(*df)
1889 pvalue = f_distribution.sf(f_statistic)
1890 crit_value = f_distribution.ppf(1 - signif)
1891 return CausalityTestResults(causing, caused, f_statistic, crit_value,
1892 pvalue, df, signif, test="granger",
1893 method="f")
1895 def test_inst_causality(self, causing, signif=0.05):
1896 r"""
1897 Test for instantaneous causality.
1899 The concept of instantaneous causality is described in chapters 3.6.3
1900 and 7.6.4 of [1]_. Test |H0|: "No instantaneous causality between the
1901 variables in `caused` and those in `causing`" against |H1|:
1902 "Instantaneous causality between `caused` and `causing` exists".
1903 Note that instantaneous causality is a symmetric relation
1904 (i.e. if `causing` is "instantaneously causing" `caused`, then also
1905 `caused` is "instantaneously causing" `causing`), thus the naming of
1906 the parameters (which is chosen to be in accordance with
1907 :meth:`test_granger_causality()`) may be misleading.
1909 Parameters
1910 ----------
1911 causing : int or str or sequence of int or str
1912 If int or str, test whether the corresponding variable is causing
1913 the variable(s) specified in caused.
1914 If sequence of int or str, test whether the corresponding variables
1915 are causing the variable(s) specified in caused.
1916 signif : float, 0 < `signif` < 1, default 5 %
1917 Significance level for computing critical values for test,
1918 defaulting to standard 0.05 level.
1920 Returns
1921 -------
1922 results : :class:`statsmodels.tsa.vector_ar.hypothesis_test_results.CausalityTestResults`
1924 Notes
1925 -----
1926 This method is not returning the same result as `JMulTi`. This is
1927 because the test is based on a VAR(k_ar) model in `statsmodels` (in
1928 accordance to pp. 104, 320-321 in [1]_) whereas `JMulTi` seems to be
1929 using a VAR(k_ar+1) model. Reducing the lag order by one in `JMulTi`
1930 leads to equal results in `statsmodels` and `JMulTi`.
1932 References
1933 ----------
1934 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
1936 .. |H0| replace:: H\ :sub:`0`
1938 .. |H1| replace:: H\ :sub:`1`
1939 """
1940 exog = _deterministic_to_exog(self.deterministic, self.seasons,
1941 nobs_tot=self.nobs + self.k_ar,
1942 first_season=self.first_season,
1943 seasons_centered=True, exog=self.exog,
1944 exog_coint=self.exog_coint)
1946 # Note: JMulTi seems to be using k_ar+1 instead of k_ar
1947 k, t, p = self.neqs, self.nobs, self.k_ar
1948 # fit with trend "nc" because all trend information is already in exog
1949 var_results = VAR(self.y_all.T, exog).fit(maxlags=p, trend="n")
1950 var_results._results.names = self.names
1951 return var_results.test_inst_causality(causing=causing, signif=signif)
1953 def irf(self, periods=10):
1954 return irf.IRAnalysis(self, periods=periods, vecm=True)
1956 @cache_readonly
1957 def fittedvalues(self):
1958 """
1959 Return the in-sample values of endog calculated by the model.
1961 Returns
1962 -------
1963 fitted : array (nobs x neqs)
1964 The predicted in-sample values of the models' endogenous variables.
1965 """
1966 beta = self.beta
1967 if self.det_coef_coint.size > 0:
1968 beta = vstack((beta, self.det_coef_coint))
1969 pi = np.dot(self.alpha, beta.T)
1971 gamma = self.gamma
1972 if self.det_coef.size > 0:
1973 gamma = hstack((gamma, self.det_coef))
1974 delta_y = np.dot(pi, self._y_lag1) + np.dot(gamma, self._delta_x)
1975 return (delta_y + self._y_lag1[:self.neqs]).T
1977 @cache_readonly
1978 def resid(self):
1979 """
1980 Return the difference between observed and fitted values.
1982 Returns
1983 -------
1984 resid : array (nobs x neqs)
1985 The residuals.
1986 """
1987 return self.y_all.T[self.k_ar:] - self.fittedvalues
1989 def test_normality(self, signif=0.05):
1990 r"""
1991 Test assumption of normal-distributed errors using Jarque-Bera-style
1992 omnibus :math:`\\chi^2` test.
1994 Parameters
1995 ----------
1996 signif : float
1997 The test's significance level.
1999 Returns
2000 -------
2001 result : :class:`statsmodels.tsa.vector_ar.hypothesis_test_results.NormalityTestResults`
2003 Notes
2004 -----
2005 |H0| : data are generated by a Gaussian-distributed process
2007 .. |H0| replace:: H\ :sub:`0`
2008 """
2009 return test_normality(self, signif=signif)
2011 def test_whiteness(self, nlags=10, signif=0.05, adjusted=False):
2012 """
2013 Test the whiteness of the residuals using the Portmanteau test.
2015 This test is described in [1]_, chapter 8.4.1.
2017 Parameters
2018 ----------
2019 nlags : int > 0
2020 signif : float, 0 < `signif` < 1
2021 adjusted : bool, default False
2023 Returns
2024 -------
2025 result : :class:`statsmodels.tsa.vector_ar.hypothesis_test_results.WhitenessTestResults`
2027 References
2028 ----------
2029 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
2030 """
2032 statistic = 0
2033 u = np.asarray(self.resid)
2034 acov_list = _compute_acov(u, nlags)
2035 # self.sigma_u instead of cov(0) is necessary to get the same
2036 # result as JMulTi. The difference between the two is that sigma_u is
2037 # calculated with the usual residuals while in cov(0) the
2038 # residuals are demeaned. To me JMulTi's behaviour seems a bit strange
2039 # because it uses the usual residuals here but demeaned residuals in
2040 # the calculation of autocovariances with lag > 0. (used in the
2041 # argument of trace() four rows below this comment.)
2042 c0_inv = inv(self.sigma_u) # instead of inv(cov(0))
2043 if c0_inv.dtype == np.complex128 and np.all(np.imag(c0_inv) == 0):
2044 c0_inv = np.real(c0_inv)
2045 for t in range(1, nlags+1):
2046 ct = acov_list[t]
2047 to_add = np.trace(ct.T @ c0_inv @ ct @ c0_inv)
2048 if adjusted:
2049 to_add /= (self.nobs - t)
2050 statistic += to_add
2051 statistic *= self.nobs**2 if adjusted else self.nobs
2053 df = self.neqs**2 * (nlags - self.k_ar + 1) - self.neqs*self.coint_rank
2054 dist = scipy.stats.chi2(df)
2055 pvalue = dist.sf(statistic)
2056 crit_value = dist.ppf(1 - signif)
2058 return WhitenessTestResults(statistic, crit_value, pvalue, df, signif,
2059 nlags, adjusted)
2061 def plot_data(self, with_presample=False):
2062 """
2063 Plot the input time series.
2065 Parameters
2066 ----------
2067 with_presample : bool, default: `False`
2068 If `False`, the pre-sample data (the first `k_ar` values) will
2069 not be plotted.
2070 """
2071 y = self.y_all if with_presample else self.y_all[:, self.k_ar:]
2072 names = self.names
2073 dates = self.dates if with_presample else self.dates[self.k_ar:]
2074 plot.plot_mts(y.T, names=names, index=dates)
2076 def summary(self, alpha=.05):
2077 """
2078 Return a summary of the estimation results.
2080 Parameters
2081 ----------
2082 alpha : float 0 < `alpha` < 1, default 0.05
2083 Significance level of the shown confidence intervals.
2085 Returns
2086 -------
2087 summary : :class:`statsmodels.iolib.summary.Summary`
2088 A summary containing information about estimated parameters.
2089 """
2090 from statsmodels.iolib.summary import summary_params
2092 summary = Summary()
2094 def make_table(self, params, std_err, t_values, p_values, conf_int,
2095 mask, names, title, strip_end=True):
2096 res = (self,
2097 params[mask],
2098 std_err[mask],
2099 t_values[mask],
2100 p_values[mask],
2101 conf_int[mask]
2102 )
2103 param_names = [
2104 '.'.join(name.split('.')[:-1]) if strip_end else name
2105 for name in np.array(names)[mask].tolist()]
2106 return summary_params(res, yname=None, xname=param_names,
2107 alpha=alpha, use_t=False, title=title)
2109 # ---------------------------------------------------------------------
2110 # Add tables with gamma and det_coef for each endogenous variable:
2111 lagged_params_components = []
2112 stderr_lagged_params_components = []
2113 tvalues_lagged_params_components = []
2114 pvalues_lagged_params_components = []
2115 conf_int_lagged_params_components = []
2116 if self.det_coef.size > 0:
2117 lagged_params_components.append(self.det_coef.flatten(order="F"))
2118 stderr_lagged_params_components.append(
2119 self.stderr_det_coef.flatten(order="F"))
2120 tvalues_lagged_params_components.append(
2121 self.tvalues_det_coef.flatten(order="F"))
2122 pvalues_lagged_params_components.append(
2123 self.pvalues_det_coef.flatten(order="F"))
2124 conf_int = self.conf_int_det_coef(alpha=alpha)
2125 lower = conf_int["lower"].flatten(order="F")
2126 upper = conf_int["upper"].flatten(order="F")
2127 conf_int_lagged_params_components.append(np.column_stack(
2128 (lower, upper)))
2129 if self.k_ar - 1 > 0:
2130 lagged_params_components.append(self.gamma.flatten())
2131 stderr_lagged_params_components.append(self.stderr_gamma.flatten())
2132 tvalues_lagged_params_components.append(
2133 self.tvalues_gamma.flatten())
2134 pvalues_lagged_params_components.append(
2135 self.pvalues_gamma.flatten())
2136 conf_int = self.conf_int_gamma(alpha=alpha)
2137 lower = conf_int["lower"].flatten()
2138 upper = conf_int["upper"].flatten()
2139 conf_int_lagged_params_components.append(np.column_stack(
2140 (lower, upper)))
2142 # if gamma or det_coef exists, then make a summary-table for them:
2143 if len(lagged_params_components) != 0:
2144 lagged_params = hstack(lagged_params_components)
2145 stderr_lagged_params = hstack(stderr_lagged_params_components)
2146 tvalues_lagged_params = hstack(tvalues_lagged_params_components)
2147 pvalues_lagged_params = hstack(pvalues_lagged_params_components)
2148 conf_int_lagged_params = vstack(conf_int_lagged_params_components)
2150 for i in range(self.neqs):
2151 masks = []
2152 offset = 0
2153 # 1. Deterministic terms outside cointegration relation
2154 if "co" in self.deterministic:
2155 masks.append(offset + np.array(i, ndmin=1))
2156 offset += self.neqs
2157 if self.seasons > 0:
2158 for _ in range(self.seasons-1):
2159 masks.append(offset + np.array(i, ndmin=1))
2160 offset += self.neqs
2161 if "lo" in self.deterministic:
2162 masks.append(offset + np.array(i, ndmin=1))
2163 offset += self.neqs
2164 if self.exog is not None:
2165 for _ in range(self.exog.shape[1]):
2166 masks.append(offset + np.array(i, ndmin=1))
2167 offset += self.neqs
2168 # 2. Lagged endogenous terms
2169 if self.k_ar - 1 > 0:
2170 start = i * self.neqs * (self.k_ar-1)
2171 end = (i+1) * self.neqs * (self.k_ar-1)
2172 masks.append(offset + np.arange(start, end))
2173 # offset += self.neqs**2 * (self.k_ar-1)
2175 # Create the table
2176 mask = np.concatenate(masks)
2177 eq_name = self.model.endog_names[i]
2178 title = "Det. terms outside the coint. relation " + \
2179 "& lagged endog. parameters for equation %s" % eq_name
2180 table = make_table(self, lagged_params, stderr_lagged_params,
2181 tvalues_lagged_params,
2182 pvalues_lagged_params,
2183 conf_int_lagged_params, mask,
2184 self.model._lagged_param_names, title)
2185 summary.tables.append(table)
2187 # ---------------------------------------------------------------------
2188 # Loading coefficients (alpha):
2189 a = self.alpha.flatten()
2190 se_a = self.stderr_alpha.flatten()
2191 t_a = self.tvalues_alpha.flatten()
2192 p_a = self.pvalues_alpha.flatten()
2193 ci_a = self.conf_int_alpha(alpha=alpha)
2194 lower = ci_a["lower"].flatten()
2195 upper = ci_a["upper"].flatten()
2196 ci_a = np.column_stack((lower, upper))
2197 a_names = self.model._load_coef_param_names
2198 alpha_masks = []
2199 for i in range(self.neqs):
2200 if self.coint_rank > 0:
2201 start = i * self.coint_rank
2202 end = start + self.coint_rank
2203 mask = np.arange(start, end)
2205 # Create the table
2206 alpha_masks.append(mask)
2208 eq_name = self.model.endog_names[i]
2209 title = "Loading coefficients (alpha) for equation %s" % eq_name
2210 table = make_table(self, a, se_a, t_a, p_a, ci_a, mask, a_names,
2211 title)
2212 summary.tables.append(table)
2214 # ---------------------------------------------------------------------
2215 # Cointegration matrix/vector (beta) and det. terms inside coint. rel.:
2216 coint_components = []
2217 stderr_coint_components = []
2218 tvalues_coint_components = []
2219 pvalues_coint_components = []
2220 conf_int_coint_components = []
2221 if self.coint_rank > 0:
2222 coint_components.append(self.beta.T.flatten())
2223 stderr_coint_components.append(self.stderr_beta.T.flatten())
2224 tvalues_coint_components.append(self.tvalues_beta.T.flatten())
2225 pvalues_coint_components.append(self.pvalues_beta.T.flatten())
2226 conf_int = self.conf_int_beta(alpha=alpha)
2227 lower = conf_int["lower"].T.flatten()
2228 upper = conf_int["upper"].T.flatten()
2229 conf_int_coint_components.append(np.column_stack(
2230 (lower, upper)))
2231 if self.det_coef_coint.size > 0:
2232 coint_components.append(self.det_coef_coint.flatten())
2233 stderr_coint_components.append(
2234 self.stderr_det_coef_coint.flatten())
2235 tvalues_coint_components.append(
2236 self.tvalues_det_coef_coint.flatten())
2237 pvalues_coint_components.append(
2238 self.pvalues_det_coef_coint.flatten())
2239 conf_int = self.conf_int_det_coef_coint(alpha=alpha)
2240 lower = conf_int["lower"].flatten()
2241 upper = conf_int["upper"].flatten()
2242 conf_int_coint_components.append(np.column_stack((lower, upper)))
2243 coint = hstack(coint_components)
2244 stderr_coint = hstack(stderr_coint_components)
2245 tvalues_coint = hstack(tvalues_coint_components)
2246 pvalues_coint = hstack(pvalues_coint_components)
2247 conf_int_coint = vstack(conf_int_coint_components)
2248 coint_names = self.model._coint_param_names
2250 for i in range(self.coint_rank):
2251 masks = []
2252 offset = 0
2254 # 1. Cointegration matrix (beta)
2255 if self.coint_rank > 0:
2256 start = i * self.neqs
2257 end = start + self.neqs
2258 masks.append(offset + np.arange(start, end))
2259 offset += self.neqs * self.coint_rank
2261 # 2. Deterministic terms inside cointegration relation
2262 if "ci" in self.deterministic:
2263 masks.append(offset + np.array(i, ndmin=1))
2264 offset += self.coint_rank
2265 if "li" in self.deterministic:
2266 masks.append(offset + np.array(i, ndmin=1))
2267 offset += self.coint_rank
2268 if self.exog_coint is not None:
2269 for _ in range(self.exog_coint.shape[1]):
2270 masks.append(offset + np.array(i, ndmin=1))
2271 offset += self.coint_rank
2273 # Create the table
2274 mask = np.concatenate(masks)
2275 title = "Cointegration relations for " + \
2276 "loading-coefficients-column %d" % (i+1)
2277 table = make_table(self, coint, stderr_coint, tvalues_coint,
2278 pvalues_coint, conf_int_coint, mask,
2279 coint_names, title)
2280 summary.tables.append(table)
2282 return summary