Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/statespace/varmax.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Vector Autoregressive Moving Average with eXogenous regressors model
5Author: Chad Fulton
6License: Simplified-BSD
7"""
9import contextlib
10from warnings import warn
11from collections import OrderedDict
13import pandas as pd
14import numpy as np
16from statsmodels.compat.pandas import Appender
17from statsmodels.tools.tools import Bunch
18from statsmodels.tools.data import _is_using_pandas
19from statsmodels.tsa.vector_ar import var_model
20import statsmodels.base.wrapper as wrap
21from statsmodels.tools.sm_exceptions import EstimationWarning
23from .kalman_filter import INVERT_UNIVARIATE, SOLVE_LU
24from .mlemodel import MLEModel, MLEResults, MLEResultsWrapper
25from .initialization import Initialization
26from .tools import (
27 is_invertible, concat, prepare_exog,
28 constrain_stationary_multivariate, unconstrain_stationary_multivariate,
29 prepare_trend_spec, prepare_trend_data
30)
33class VARMAX(MLEModel):
34 r"""
35 Vector Autoregressive Moving Average with eXogenous regressors model
37 Parameters
38 ----------
39 endog : array_like
40 The observed time-series process :math:`y`, , shaped nobs x k_endog.
41 exog : array_like, optional
42 Array of exogenous regressors, shaped nobs x k.
43 order : iterable
44 The (p,q) order of the model for the number of AR and MA parameters to
45 use.
46 trend : str{'n','c','t','ct'} or iterable, optional
47 Parameter controlling the deterministic trend polynomial :math:`A(t)`.
48 Can be specified as a string where 'c' indicates a constant (i.e. a
49 degree zero component of the trend polynomial), 't' indicates a
50 linear trend with time, and 'ct' is both. Can also be specified as an
51 iterable defining the polynomial as in `numpy.poly1d`, where
52 `[1,1,0,1]` would denote :math:`a + bt + ct^3`. Default is a constant
53 trend component.
54 error_cov_type : {'diagonal', 'unstructured'}, optional
55 The structure of the covariance matrix of the error term, where
56 "unstructured" puts no restrictions on the matrix and "diagonal"
57 requires it to be a diagonal matrix (uncorrelated errors). Default is
58 "unstructured".
59 measurement_error : bool, optional
60 Whether or not to assume the endogenous observations `endog` were
61 measured with error. Default is False.
62 enforce_stationarity : bool, optional
63 Whether or not to transform the AR parameters to enforce stationarity
64 in the autoregressive component of the model. Default is True.
65 enforce_invertibility : bool, optional
66 Whether or not to transform the MA parameters to enforce invertibility
67 in the moving average component of the model. Default is True.
68 trend_offset : int, optional
69 The offset at which to start time trend values. Default is 1, so that
70 if `trend='t'` the trend is equal to 1, 2, ..., nobs. Typically is only
71 set when the model created by extending a previous dataset.
72 **kwargs
73 Keyword arguments may be used to provide default values for state space
74 matrices or for Kalman filtering options. See `Representation`, and
75 `KalmanFilter` for more details.
77 Attributes
78 ----------
79 order : iterable
80 The (p,q) order of the model for the number of AR and MA parameters to
81 use.
82 trend : str{'n','c','t','ct'} or iterable
83 Parameter controlling the deterministic trend polynomial :math:`A(t)`.
84 Can be specified as a string where 'c' indicates a constant (i.e. a
85 degree zero component of the trend polynomial), 't' indicates a
86 linear trend with time, and 'ct' is both. Can also be specified as an
87 iterable defining the polynomial as in `numpy.poly1d`, where
88 `[1,1,0,1]` would denote :math:`a + bt + ct^3`.
89 error_cov_type : {'diagonal', 'unstructured'}, optional
90 The structure of the covariance matrix of the error term, where
91 "unstructured" puts no restrictions on the matrix and "diagonal"
92 requires it to be a diagonal matrix (uncorrelated errors). Default is
93 "unstructured".
94 measurement_error : bool, optional
95 Whether or not to assume the endogenous observations `endog` were
96 measured with error. Default is False.
97 enforce_stationarity : bool, optional
98 Whether or not to transform the AR parameters to enforce stationarity
99 in the autoregressive component of the model. Default is True.
100 enforce_invertibility : bool, optional
101 Whether or not to transform the MA parameters to enforce invertibility
102 in the moving average component of the model. Default is True.
104 Notes
105 -----
106 Generically, the VARMAX model is specified (see for example chapter 18 of
107 [1]_):
109 .. math::
111 y_t = A(t) + A_1 y_{t-1} + \dots + A_p y_{t-p} + B x_t + \epsilon_t +
112 M_1 \epsilon_{t-1} + \dots M_q \epsilon_{t-q}
114 where :math:`\epsilon_t \sim N(0, \Omega)`, and where :math:`y_t` is a
115 `k_endog x 1` vector. Additionally, this model allows considering the case
116 where the variables are measured with error.
118 Note that in the full VARMA(p,q) case there is a fundamental identification
119 problem in that the coefficient matrices :math:`\{A_i, M_j\}` are not
120 generally unique, meaning that for a given time series process there may
121 be multiple sets of matrices that equivalently represent it. See Chapter 12
122 of [1]_ for more information. Although this class can be used to estimate
123 VARMA(p,q) models, a warning is issued to remind users that no steps have
124 been taken to ensure identification in this case.
126 References
127 ----------
128 .. [1] Lütkepohl, Helmut. 2007.
129 New Introduction to Multiple Time Series Analysis.
130 Berlin: Springer.
131 """
133 def __init__(self, endog, exog=None, order=(1, 0), trend='c',
134 error_cov_type='unstructured', measurement_error=False,
135 enforce_stationarity=True, enforce_invertibility=True,
136 trend_offset=1, **kwargs):
138 # Model parameters
139 self.error_cov_type = error_cov_type
140 self.measurement_error = measurement_error
141 self.enforce_stationarity = enforce_stationarity
142 self.enforce_invertibility = enforce_invertibility
144 # Save the given orders
145 self.order = order
147 # Model orders
148 self.k_ar = int(order[0])
149 self.k_ma = int(order[1])
151 # Check for valid model
152 if error_cov_type not in ['diagonal', 'unstructured']:
153 raise ValueError('Invalid error covariance matrix type'
154 ' specification.')
155 if self.k_ar == 0 and self.k_ma == 0:
156 raise ValueError('Invalid VARMAX(p,q) specification; at least one'
157 ' p,q must be greater than zero.')
159 # Warn for VARMA model
160 if self.k_ar > 0 and self.k_ma > 0:
161 warn('Estimation of VARMA(p,q) models is not generically robust,'
162 ' due especially to identification issues.',
163 EstimationWarning)
165 # Trend
166 self.trend = trend
167 self.trend_offset = trend_offset
168 self.polynomial_trend, self.k_trend = prepare_trend_spec(self.trend)
169 self._trend_is_const = (self.polynomial_trend.size == 1 and
170 self.polynomial_trend[0] == 1)
172 # Exogenous data
173 (self.k_exog, exog) = prepare_exog(exog)
175 # Note: at some point in the future might add state regression, as in
176 # SARIMAX.
177 self.mle_regression = self.k_exog > 0
179 # We need to have an array or pandas at this point
180 if not _is_using_pandas(endog, None):
181 endog = np.asanyarray(endog)
183 # Model order
184 # Used internally in various places
185 _min_k_ar = max(self.k_ar, 1)
186 self._k_order = _min_k_ar + self.k_ma
188 # Number of states
189 k_endog = endog.shape[1]
190 k_posdef = k_endog
191 k_states = k_endog * self._k_order
193 # By default, initialize as stationary
194 kwargs.setdefault('initialization', 'stationary')
196 # By default, use LU decomposition
197 kwargs.setdefault('inversion_method', INVERT_UNIVARIATE | SOLVE_LU)
199 # Initialize the state space model
200 super(VARMAX, self).__init__(
201 endog, exog=exog, k_states=k_states, k_posdef=k_posdef, **kwargs
202 )
204 # Set as time-varying model if we have time-trend or exog
205 if self.k_exog > 0 or (self.k_trend > 0 and not self._trend_is_const):
206 self.ssm._time_invariant = False
208 # Initialize the parameters
209 self.parameters = OrderedDict()
210 self.parameters['trend'] = self.k_endog * self.k_trend
211 self.parameters['ar'] = self.k_endog**2 * self.k_ar
212 self.parameters['ma'] = self.k_endog**2 * self.k_ma
213 self.parameters['regression'] = self.k_endog * self.k_exog
214 if self.error_cov_type == 'diagonal':
215 self.parameters['state_cov'] = self.k_endog
216 # These parameters fill in a lower-triangular matrix which is then
217 # dotted with itself to get a positive definite matrix.
218 elif self.error_cov_type == 'unstructured':
219 self.parameters['state_cov'] = (
220 int(self.k_endog * (self.k_endog + 1) / 2)
221 )
222 self.parameters['obs_cov'] = self.k_endog * self.measurement_error
223 self.k_params = sum(self.parameters.values())
225 # Initialize trend data: we create trend data with one more observation
226 # than we actually have, to make it easier to insert the appropriate
227 # trend component into the final state intercept.
228 trend_data = prepare_trend_data(
229 self.polynomial_trend, self.k_trend, self.nobs + 1,
230 offset=self.trend_offset)
231 self._trend_data = trend_data[:-1]
232 self._final_trend = trend_data[-1:]
234 # Initialize known elements of the state space matrices
236 # If we have exog effects, then the state intercept needs to be
237 # time-varying
238 if (self.k_trend > 0 and not self._trend_is_const) or self.k_exog > 0:
239 self.ssm['state_intercept'] = np.zeros((self.k_states, self.nobs))
240 # self.ssm['obs_intercept'] = np.zeros((self.k_endog, self.nobs))
242 # The design matrix is just an identity for the first k_endog states
243 idx = np.diag_indices(self.k_endog)
244 self.ssm[('design',) + idx] = 1
246 # The transition matrix is described in four blocks, where the upper
247 # left block is in companion form with the autoregressive coefficient
248 # matrices (so it is shaped k_endog * k_ar x k_endog * k_ar) ...
249 if self.k_ar > 0:
250 idx = np.diag_indices((self.k_ar - 1) * self.k_endog)
251 idx = idx[0] + self.k_endog, idx[1]
252 self.ssm[('transition',) + idx] = 1
253 # ... and the lower right block is in companion form with zeros as the
254 # coefficient matrices (it is shaped k_endog * k_ma x k_endog * k_ma).
255 idx = np.diag_indices((self.k_ma - 1) * self.k_endog)
256 idx = (idx[0] + (_min_k_ar + 1) * self.k_endog,
257 idx[1] + _min_k_ar * self.k_endog)
258 self.ssm[('transition',) + idx] = 1
260 # The selection matrix is described in two blocks, where the upper
261 # block selects the all k_posdef errors in the first k_endog rows
262 # (the upper block is shaped k_endog * k_ar x k) and the lower block
263 # also selects all k_posdef errors in the first k_endog rows (the lower
264 # block is shaped k_endog * k_ma x k).
265 idx = np.diag_indices(self.k_endog)
266 self.ssm[('selection',) + idx] = 1
267 idx = idx[0] + _min_k_ar * self.k_endog, idx[1]
268 if self.k_ma > 0:
269 self.ssm[('selection',) + idx] = 1
271 # Cache some indices
272 if self._trend_is_const and self.k_exog == 0:
273 self._idx_state_intercept = np.s_['state_intercept', :k_endog, :]
274 elif self.k_trend > 0 or self.k_exog > 0:
275 self._idx_state_intercept = np.s_['state_intercept', :k_endog, :-1]
276 if self.k_ar > 0:
277 self._idx_transition = np.s_['transition', :k_endog, :]
278 else:
279 self._idx_transition = np.s_['transition', :k_endog, k_endog:]
280 if self.error_cov_type == 'diagonal':
281 self._idx_state_cov = (
282 ('state_cov',) + np.diag_indices(self.k_endog))
283 elif self.error_cov_type == 'unstructured':
284 self._idx_lower_state_cov = np.tril_indices(self.k_endog)
285 if self.measurement_error:
286 self._idx_obs_cov = ('obs_cov',) + np.diag_indices(self.k_endog)
288 # Cache some slices
289 def _slice(key, offset):
290 length = self.parameters[key]
291 param_slice = np.s_[offset:offset + length]
292 offset += length
293 return param_slice, offset
295 offset = 0
296 self._params_trend, offset = _slice('trend', offset)
297 self._params_ar, offset = _slice('ar', offset)
298 self._params_ma, offset = _slice('ma', offset)
299 self._params_regression, offset = _slice('regression', offset)
300 self._params_state_cov, offset = _slice('state_cov', offset)
301 self._params_obs_cov, offset = _slice('obs_cov', offset)
303 # Variable holding optional final `exog`
304 # (note: self._final_trend was set earlier)
305 self._final_exog = None
307 # Update _init_keys attached by super
308 self._init_keys += ['order', 'trend', 'error_cov_type',
309 'measurement_error', 'enforce_stationarity',
310 'enforce_invertibility'] + list(kwargs.keys())
312 def clone(self, endog, exog=None, **kwargs):
313 return self._clone_from_init_kwds(endog, exog=exog, **kwargs)
315 @property
316 def _res_classes(self):
317 return {'fit': (VARMAXResults, VARMAXResultsWrapper)}
319 @property
320 def start_params(self):
321 params = np.zeros(self.k_params, dtype=np.float64)
323 # A. Run a multivariate regression to get beta estimates
324 endog = pd.DataFrame(self.endog.copy())
325 endog = endog.interpolate()
326 endog = endog.fillna(method='backfill').values
327 exog = None
328 if self.k_trend > 0 and self.k_exog > 0:
329 exog = np.c_[self._trend_data, self.exog]
330 elif self.k_trend > 0:
331 exog = self._trend_data
332 elif self.k_exog > 0:
333 exog = self.exog
335 # Although the Kalman filter can deal with missing values in endog,
336 # conditional sum of squares cannot
337 if np.any(np.isnan(endog)):
338 mask = ~np.any(np.isnan(endog), axis=1)
339 endog = endog[mask]
340 if exog is not None:
341 exog = exog[mask]
343 # Regression and trend effects via OLS
344 trend_params = np.zeros(0)
345 exog_params = np.zeros(0)
346 if self.k_trend > 0 or self.k_exog > 0:
347 trendexog_params = np.linalg.pinv(exog).dot(endog)
348 endog -= np.dot(exog, trendexog_params)
349 if self.k_trend > 0:
350 trend_params = trendexog_params[:self.k_trend].T
351 if self.k_endog > 0:
352 exog_params = trendexog_params[self.k_trend:].T
354 # B. Run a VAR model on endog to get trend, AR parameters
355 ar_params = []
356 k_ar = self.k_ar if self.k_ar > 0 else 1
357 mod_ar = var_model.VAR(endog)
358 res_ar = mod_ar.fit(maxlags=k_ar, ic=None, trend='nc')
359 if self.k_ar > 0:
360 ar_params = np.array(res_ar.params).T.ravel()
361 endog = res_ar.resid
363 # Test for stationarity
364 if self.k_ar > 0 and self.enforce_stationarity:
365 coefficient_matrices = (
366 ar_params.reshape(
367 self.k_endog * self.k_ar, self.k_endog
368 ).T
369 ).reshape(self.k_endog, self.k_endog, self.k_ar).T
371 stationary = is_invertible([1] + list(-coefficient_matrices))
373 if not stationary:
374 warn('Non-stationary starting autoregressive parameters'
375 ' found. Using zeros as starting parameters.')
376 ar_params *= 0
378 # C. Run a VAR model on the residuals to get MA parameters
379 ma_params = []
380 if self.k_ma > 0:
381 mod_ma = var_model.VAR(endog)
382 res_ma = mod_ma.fit(maxlags=self.k_ma, ic=None, trend='nc')
383 ma_params = np.array(res_ma.params.T).ravel()
385 # Test for invertibility
386 if self.enforce_invertibility:
387 coefficient_matrices = (
388 ma_params.reshape(
389 self.k_endog * self.k_ma, self.k_endog
390 ).T
391 ).reshape(self.k_endog, self.k_endog, self.k_ma).T
393 invertible = is_invertible([1] + list(-coefficient_matrices))
395 if not invertible:
396 warn('Non-stationary starting moving-average parameters'
397 ' found. Using zeros as starting parameters.')
398 ma_params *= 0
400 # Transform trend / exog params from mean form to intercept form
401 if self.k_ar > 0 and (self.k_trend > 0 or self.mle_regression):
402 coefficient_matrices = (
403 ar_params.reshape(
404 self.k_endog * self.k_ar, self.k_endog
405 ).T
406 ).reshape(self.k_endog, self.k_endog, self.k_ar).T
408 tmp = np.eye(self.k_endog) - np.sum(coefficient_matrices, axis=0)
410 if self.k_trend > 0:
411 trend_params = np.dot(tmp, trend_params)
412 if self.mle_regression > 0:
413 exog_params = np.dot(tmp, exog_params)
415 # 1. Intercept terms
416 if self.k_trend > 0:
417 params[self._params_trend] = trend_params.ravel()
419 # 2. AR terms
420 if self.k_ar > 0:
421 params[self._params_ar] = ar_params
423 # 3. MA terms
424 if self.k_ma > 0:
425 params[self._params_ma] = ma_params
427 # 4. Regression terms
428 if self.mle_regression:
429 params[self._params_regression] = exog_params.ravel()
431 # 5. State covariance terms
432 if self.error_cov_type == 'diagonal':
433 params[self._params_state_cov] = res_ar.sigma_u.diagonal()
434 elif self.error_cov_type == 'unstructured':
435 cov_factor = np.linalg.cholesky(res_ar.sigma_u)
436 params[self._params_state_cov] = (
437 cov_factor[self._idx_lower_state_cov].ravel())
439 # 5. Measurement error variance terms
440 if self.measurement_error:
441 if self.k_ma > 0:
442 params[self._params_obs_cov] = res_ma.sigma_u.diagonal()
443 else:
444 params[self._params_obs_cov] = res_ar.sigma_u.diagonal()
446 return params
448 @property
449 def param_names(self):
450 param_names = []
451 endog_names = self.endog_names
452 if not isinstance(self.endog_names, list):
453 endog_names = [endog_names]
455 # 1. Intercept terms
456 if self.k_trend > 0:
457 for i in self.polynomial_trend.nonzero()[0]:
458 if i == 0:
459 param_names += ['intercept.%s' % endog_names[j]
460 for j in range(self.k_endog)]
461 elif i == 1:
462 param_names += ['drift.%s' % endog_names[j]
463 for j in range(self.k_endog)]
464 else:
465 param_names += ['trend.%d.%s' % (i, endog_names[j])
466 for j in range(self.k_endog)]
468 # 2. AR terms
469 param_names += [
470 'L%d.%s.%s' % (i+1, endog_names[k], endog_names[j])
471 for j in range(self.k_endog)
472 for i in range(self.k_ar)
473 for k in range(self.k_endog)
474 ]
476 # 3. MA terms
477 param_names += [
478 'L%d.e(%s).%s' % (i+1, endog_names[k], endog_names[j])
479 for j in range(self.k_endog)
480 for i in range(self.k_ma)
481 for k in range(self.k_endog)
482 ]
484 # 4. Regression terms
485 param_names += [
486 'beta.%s.%s' % (self.exog_names[j], endog_names[i])
487 for i in range(self.k_endog)
488 for j in range(self.k_exog)
489 ]
491 # 5. State covariance terms
492 if self.error_cov_type == 'diagonal':
493 param_names += [
494 'sigma2.%s' % endog_names[i]
495 for i in range(self.k_endog)
496 ]
497 elif self.error_cov_type == 'unstructured':
498 param_names += [
499 ('sqrt.var.%s' % endog_names[i] if i == j else
500 'sqrt.cov.%s.%s' % (endog_names[j], endog_names[i]))
501 for i in range(self.k_endog)
502 for j in range(i+1)
503 ]
505 # 5. Measurement error variance terms
506 if self.measurement_error:
507 param_names += [
508 'measurement_variance.%s' % endog_names[i]
509 for i in range(self.k_endog)
510 ]
512 return param_names
514 def transform_params(self, unconstrained):
515 """
516 Transform unconstrained parameters used by the optimizer to constrained
517 parameters used in likelihood evaluation
519 Parameters
520 ----------
521 unconstrained : array_like
522 Array of unconstrained parameters used by the optimizer, to be
523 transformed.
525 Returns
526 -------
527 constrained : array_like
528 Array of constrained parameters which may be used in likelihood
529 evaluation.
531 Notes
532 -----
533 Constrains the factor transition to be stationary and variances to be
534 positive.
535 """
536 unconstrained = np.array(unconstrained, ndmin=1)
537 constrained = np.zeros(unconstrained.shape, dtype=unconstrained.dtype)
539 # 1. Intercept terms: nothing to do
540 constrained[self._params_trend] = unconstrained[self._params_trend]
542 # 2. AR terms: optionally force to be stationary
543 if self.k_ar > 0 and self.enforce_stationarity:
544 # Create the state covariance matrix
545 if self.error_cov_type == 'diagonal':
546 state_cov = np.diag(unconstrained[self._params_state_cov]**2)
547 elif self.error_cov_type == 'unstructured':
548 state_cov_lower = np.zeros(self.ssm['state_cov'].shape,
549 dtype=unconstrained.dtype)
550 state_cov_lower[self._idx_lower_state_cov] = (
551 unconstrained[self._params_state_cov])
552 state_cov = np.dot(state_cov_lower, state_cov_lower.T)
554 # Transform the parameters
555 coefficients = unconstrained[self._params_ar].reshape(
556 self.k_endog, self.k_endog * self.k_ar)
557 coefficient_matrices, variance = (
558 constrain_stationary_multivariate(coefficients, state_cov))
559 constrained[self._params_ar] = coefficient_matrices.ravel()
560 else:
561 constrained[self._params_ar] = unconstrained[self._params_ar]
563 # 3. MA terms: optionally force to be invertible
564 if self.k_ma > 0 and self.enforce_invertibility:
565 # Transform the parameters, using an identity variance matrix
566 state_cov = np.eye(self.k_endog, dtype=unconstrained.dtype)
567 coefficients = unconstrained[self._params_ma].reshape(
568 self.k_endog, self.k_endog * self.k_ma)
569 coefficient_matrices, variance = (
570 constrain_stationary_multivariate(coefficients, state_cov))
571 constrained[self._params_ma] = coefficient_matrices.ravel()
572 else:
573 constrained[self._params_ma] = unconstrained[self._params_ma]
575 # 4. Regression terms: nothing to do
576 constrained[self._params_regression] = (
577 unconstrained[self._params_regression])
579 # 5. State covariance terms
580 # If we have variances, force them to be positive
581 if self.error_cov_type == 'diagonal':
582 constrained[self._params_state_cov] = (
583 unconstrained[self._params_state_cov]**2)
584 # Otherwise, nothing needs to be done
585 elif self.error_cov_type == 'unstructured':
586 constrained[self._params_state_cov] = (
587 unconstrained[self._params_state_cov])
589 # 5. Measurement error variance terms
590 if self.measurement_error:
591 # Force these to be positive
592 constrained[self._params_obs_cov] = (
593 unconstrained[self._params_obs_cov]**2)
595 return constrained
597 def untransform_params(self, constrained):
598 """
599 Transform constrained parameters used in likelihood evaluation
600 to unconstrained parameters used by the optimizer.
602 Parameters
603 ----------
604 constrained : array_like
605 Array of constrained parameters used in likelihood evaluation, to
606 be transformed.
608 Returns
609 -------
610 unconstrained : array_like
611 Array of unconstrained parameters used by the optimizer.
612 """
613 constrained = np.array(constrained, ndmin=1)
614 unconstrained = np.zeros(constrained.shape, dtype=constrained.dtype)
616 # 1. Intercept terms: nothing to do
617 unconstrained[self._params_trend] = constrained[self._params_trend]
619 # 2. AR terms: optionally were forced to be stationary
620 if self.k_ar > 0 and self.enforce_stationarity:
621 # Create the state covariance matrix
622 if self.error_cov_type == 'diagonal':
623 state_cov = np.diag(constrained[self._params_state_cov])
624 elif self.error_cov_type == 'unstructured':
625 state_cov_lower = np.zeros(self.ssm['state_cov'].shape,
626 dtype=constrained.dtype)
627 state_cov_lower[self._idx_lower_state_cov] = (
628 constrained[self._params_state_cov])
629 state_cov = np.dot(state_cov_lower, state_cov_lower.T)
631 # Transform the parameters
632 coefficients = constrained[self._params_ar].reshape(
633 self.k_endog, self.k_endog * self.k_ar)
634 unconstrained_matrices, variance = (
635 unconstrain_stationary_multivariate(coefficients, state_cov))
636 unconstrained[self._params_ar] = unconstrained_matrices.ravel()
637 else:
638 unconstrained[self._params_ar] = constrained[self._params_ar]
640 # 3. MA terms: optionally were forced to be invertible
641 if self.k_ma > 0 and self.enforce_invertibility:
642 # Transform the parameters, using an identity variance matrix
643 state_cov = np.eye(self.k_endog, dtype=constrained.dtype)
644 coefficients = constrained[self._params_ma].reshape(
645 self.k_endog, self.k_endog * self.k_ma)
646 unconstrained_matrices, variance = (
647 unconstrain_stationary_multivariate(coefficients, state_cov))
648 unconstrained[self._params_ma] = unconstrained_matrices.ravel()
649 else:
650 unconstrained[self._params_ma] = constrained[self._params_ma]
652 # 4. Regression terms: nothing to do
653 unconstrained[self._params_regression] = (
654 constrained[self._params_regression])
656 # 5. State covariance terms
657 # If we have variances, then these were forced to be positive
658 if self.error_cov_type == 'diagonal':
659 unconstrained[self._params_state_cov] = (
660 constrained[self._params_state_cov]**0.5)
661 # Otherwise, nothing needs to be done
662 elif self.error_cov_type == 'unstructured':
663 unconstrained[self._params_state_cov] = (
664 constrained[self._params_state_cov])
666 # 5. Measurement error variance terms
667 if self.measurement_error:
668 # These were forced to be positive
669 unconstrained[self._params_obs_cov] = (
670 constrained[self._params_obs_cov]**0.5)
672 return unconstrained
674 def _validate_can_fix_params(self, param_names):
675 super(VARMAX, self)._validate_can_fix_params(param_names)
677 ix = np.cumsum(list(self.parameters.values()))[:-1]
678 (_, ar_names, ma_names, _, _, _) = [
679 arr.tolist() for arr in np.array_split(self.param_names, ix)]
681 if self.enforce_stationarity and self.k_ar > 0:
682 if self.k_endog > 1 or self.k_ar > 1:
683 fix_all = param_names.issuperset(ar_names)
684 fix_any = (
685 len(param_names.intersection(ar_names)) > 0)
686 if fix_any and not fix_all:
687 raise ValueError(
688 'Cannot fix individual autoregressive parameters'
689 ' when `enforce_stationarity=True`. In this case,'
690 ' must either fix all autoregressive parameters or'
691 ' none.')
692 if self.enforce_invertibility and self.k_ma > 0:
693 if self.k_endog or self.k_ma > 1:
694 fix_all = param_names.issuperset(ma_names)
695 fix_any = (
696 len(param_names.intersection(ma_names)) > 0)
697 if fix_any and not fix_all:
698 raise ValueError(
699 'Cannot fix individual moving average parameters'
700 ' when `enforce_invertibility=True`. In this case,'
701 ' must either fix all moving average parameters or'
702 ' none.')
704 def update(self, params, transformed=True, includes_fixed=False,
705 complex_step=False):
706 params = self.handle_params(params, transformed=transformed,
707 includes_fixed=includes_fixed)
709 # 1. State intercept
710 # - Exog
711 if self.mle_regression:
712 exog_params = params[self._params_regression].reshape(
713 self.k_endog, self.k_exog).T
714 intercept = np.dot(self.exog[1:], exog_params)
715 self.ssm[self._idx_state_intercept] = intercept.T
717 if self._final_exog is not None:
718 self.ssm['state_intercept', :self.k_endog, -1] = np.dot(
719 self._final_exog, exog_params)
721 # - Trend
722 if self.k_trend > 0:
723 # If we did not set the intercept above, zero it out so we can
724 # just += later
725 if not self.mle_regression:
726 zero = np.array(0, dtype=params.dtype)
727 self.ssm[self._idx_state_intercept] = zero
729 trend_params = params[self._params_trend].reshape(
730 self.k_endog, self.k_trend).T
731 if self._trend_is_const:
732 intercept = trend_params
733 else:
734 intercept = np.dot(self._trend_data[1:], trend_params)
735 self.ssm[self._idx_state_intercept] += intercept.T
737 if self._final_trend is not None and not self._trend_is_const:
738 self.ssm['state_intercept', :self.k_endog, -1:] += np.dot(
739 self._final_trend, trend_params).T
741 # Need to set the last state intercept to np.nan (with appropriate
742 # dtype) if we don't have the final exog
743 if self.mle_regression and self._final_exog is None:
744 nan = np.array(np.nan, dtype=params.dtype)
745 self.ssm['state_intercept', :self.k_endog, -1] = nan
747 # 2. Transition
748 ar = params[self._params_ar].reshape(
749 self.k_endog, self.k_endog * self.k_ar)
750 ma = params[self._params_ma].reshape(
751 self.k_endog, self.k_endog * self.k_ma)
752 self.ssm[self._idx_transition] = np.c_[ar, ma]
754 # 3. State covariance
755 if self.error_cov_type == 'diagonal':
756 self.ssm[self._idx_state_cov] = (
757 params[self._params_state_cov]
758 )
759 elif self.error_cov_type == 'unstructured':
760 state_cov_lower = np.zeros(self.ssm['state_cov'].shape,
761 dtype=params.dtype)
762 state_cov_lower[self._idx_lower_state_cov] = (
763 params[self._params_state_cov])
764 self.ssm['state_cov'] = np.dot(state_cov_lower, state_cov_lower.T)
766 # 4. Observation covariance
767 if self.measurement_error:
768 self.ssm[self._idx_obs_cov] = params[self._params_obs_cov]
770 @contextlib.contextmanager
771 def _set_final_exog(self, exog):
772 """
773 Set the final state intercept value using out-of-sample `exog` / trend
775 Parameters
776 ----------
777 exog : ndarray
778 Out-of-sample `exog` values, usually produced by
779 `_validate_out_of_sample_exog` to ensure the correct shape (this
780 method does not do any additional validation of its own).
781 out_of_sample : int
782 Number of out-of-sample periods.
784 Notes
785 -----
786 We need special handling for simulating or forecasting with `exog` or
787 trend, because if we had these then the last predicted_state has been
788 set to NaN since we did not have the appropriate `exog` to create it.
789 Since we handle trend in the same way as `exog`, we still have this
790 issue when only trend is used without `exog`.
791 """
792 cache_value = self._final_exog
793 if self.k_exog > 0:
794 if exog is not None:
795 exog = np.atleast_1d(exog)
796 if exog.ndim == 2:
797 exog = exog[:1]
798 try:
799 exog = np.reshape(exog[:1], (self.k_exog,))
800 except ValueError:
801 raise ValueError('Provided exogenous values are not of the'
802 ' appropriate shape. Required %s, got %s.'
803 % (str((self.k_exog,)),
804 str(exog.shape)))
805 self._final_exog = exog
806 try:
807 yield
808 finally:
809 self._final_exog = cache_value
811 @Appender(MLEModel.simulate.__doc__)
812 def simulate(self, params, nsimulations, measurement_shocks=None,
813 state_shocks=None, initial_state=None, anchor=None,
814 repetitions=None, exog=None, extend_model=None,
815 extend_kwargs=None, transformed=True, includes_fixed=False,
816 **kwargs):
817 with self._set_final_exog(exog):
818 out = super(VARMAX, self).simulate(
819 params, nsimulations, measurement_shocks=measurement_shocks,
820 state_shocks=state_shocks, initial_state=initial_state,
821 anchor=anchor, repetitions=repetitions, exog=exog,
822 extend_model=extend_model, extend_kwargs=extend_kwargs,
823 transformed=transformed, includes_fixed=includes_fixed,
824 **kwargs)
825 return out
828class VARMAXResults(MLEResults):
829 """
830 Class to hold results from fitting an VARMAX model.
832 Parameters
833 ----------
834 model : VARMAX instance
835 The fitted model instance
837 Attributes
838 ----------
839 specification : dictionary
840 Dictionary including all attributes from the VARMAX model instance.
841 coefficient_matrices_var : ndarray
842 Array containing autoregressive lag polynomial coefficient matrices,
843 ordered from lowest degree to highest.
844 coefficient_matrices_vma : ndarray
845 Array containing moving average lag polynomial coefficients,
846 ordered from lowest degree to highest.
848 See Also
849 --------
850 statsmodels.tsa.statespace.kalman_filter.FilterResults
851 statsmodels.tsa.statespace.mlemodel.MLEResults
852 """
853 def __init__(self, model, params, filter_results, cov_type=None,
854 cov_kwds=None, **kwargs):
855 super(VARMAXResults, self).__init__(model, params, filter_results,
856 cov_type, cov_kwds, **kwargs)
858 self.specification = Bunch(**{
859 # Set additional model parameters
860 'error_cov_type': self.model.error_cov_type,
861 'measurement_error': self.model.measurement_error,
862 'enforce_stationarity': self.model.enforce_stationarity,
863 'enforce_invertibility': self.model.enforce_invertibility,
864 'trend_offset': self.model.trend_offset,
866 'order': self.model.order,
868 # Model order
869 'k_ar': self.model.k_ar,
870 'k_ma': self.model.k_ma,
872 # Trend / Regression
873 'trend': self.model.trend,
874 'k_trend': self.model.k_trend,
875 'k_exog': self.model.k_exog,
876 })
878 # Polynomials / coefficient matrices
879 self.coefficient_matrices_var = None
880 self.coefficient_matrices_vma = None
881 if self.model.k_ar > 0:
882 ar_params = np.array(self.params[self.model._params_ar])
883 k_endog = self.model.k_endog
884 k_ar = self.model.k_ar
885 self.coefficient_matrices_var = (
886 ar_params.reshape(k_endog * k_ar, k_endog).T
887 ).reshape(k_endog, k_endog, k_ar).T
888 if self.model.k_ma > 0:
889 ma_params = np.array(self.params[self.model._params_ma])
890 k_endog = self.model.k_endog
891 k_ma = self.model.k_ma
892 self.coefficient_matrices_vma = (
893 ma_params.reshape(k_endog * k_ma, k_endog).T
894 ).reshape(k_endog, k_endog, k_ma).T
896 def extend(self, endog, exog=None, **kwargs):
897 # If we have exog, then the last element of predicted_state and
898 # predicted_state_cov are nan (since they depend on the exog associated
899 # with the first out-of-sample point), so we need to compute them here
900 if exog is not None:
901 fcast = self.get_prediction(self.nobs, self.nobs, exog=exog[:1])
902 fcast_results = fcast.prediction_results
903 initial_state = fcast_results.predicted_state[..., 0]
904 initial_state_cov = fcast_results.predicted_state_cov[..., 0]
905 else:
906 initial_state = self.predicted_state[..., -1]
907 initial_state_cov = self.predicted_state_cov[..., -1]
909 kwargs.setdefault('trend_offset', self.nobs + self.model.trend_offset)
910 mod = self.model.clone(endog, exog=exog, **kwargs)
912 mod.ssm.initialization = Initialization(
913 mod.k_states, 'known', constant=initial_state,
914 stationary_cov=initial_state_cov)
916 if self.smoother_results is not None:
917 res = mod.smooth(self.params)
918 else:
919 res = mod.filter(self.params)
921 return res
923 @contextlib.contextmanager
924 def _set_final_predicted_state(self, exog, out_of_sample):
925 """
926 Set the final predicted state value using out-of-sample `exog` / trend
928 Parameters
929 ----------
930 exog : ndarray
931 Out-of-sample `exog` values, usually produced by
932 `_validate_out_of_sample_exog` to ensure the correct shape (this
933 method does not do any additional validation of its own).
934 out_of_sample : int
935 Number of out-of-sample periods.
937 Notes
938 -----
939 We need special handling for forecasting with `exog` or trend, because
940 if we had these then the last predicted_state has been set to NaN since
941 we did not have the appropriate `exog` to create it. Since we handle
942 trend in the same way as `exog`, we still have this issue when only
943 trend is used without `exog`.
944 """
945 flag = out_of_sample and (
946 self.model.k_exog > 0 or self.model.k_trend > 0)
948 if flag:
949 tmp_endog = concat([
950 self.model.endog[-1:], np.zeros((1, self.model.k_endog))])
951 if self.model.k_exog > 0:
952 tmp_exog = concat([self.model.exog[-1:], exog[:1]])
953 else:
954 tmp_exog = None
956 tmp_trend_offset = self.model.trend_offset + self.nobs - 1
957 tmp_mod = self.model.clone(tmp_endog, exog=tmp_exog,
958 trend_offset=tmp_trend_offset)
959 constant = self.filter_results.predicted_state[:, -2]
960 stationary_cov = self.filter_results.predicted_state_cov[:, :, -2]
961 tmp_mod.ssm.initialize_known(constant=constant,
962 stationary_cov=stationary_cov)
963 tmp_res = tmp_mod.filter(self.params, transformed=True,
964 includes_fixed=True, return_ssm=True)
966 # Patch up `predicted_state`
967 self.filter_results.predicted_state[:, -1] = (
968 tmp_res.predicted_state[:, -2])
969 try:
970 yield
971 finally:
972 if flag:
973 self.filter_results.predicted_state[:, -1] = np.nan
975 @Appender(MLEResults.get_prediction.__doc__)
976 def get_prediction(self, start=None, end=None, dynamic=False, index=None,
977 exog=None, **kwargs):
978 if start is None:
979 start = 0
981 # Handle end (e.g. date)
982 _start, _end, out_of_sample, _ = (
983 self.model._get_prediction_index(start, end, index, silent=True))
985 # Normalize `exog`
986 exog = self.model._validate_out_of_sample_exog(exog, out_of_sample)
988 # Handle trend offset for extended model
989 extend_kwargs = {}
990 if self.model.k_trend > 0:
991 extend_kwargs['trend_offset'] = (
992 self.model.trend_offset + self.nobs)
994 # Get the prediction
995 with self.model._set_final_exog(exog):
996 with self._set_final_predicted_state(exog, out_of_sample):
997 out = super(VARMAXResults, self).get_prediction(
998 start=start, end=end, dynamic=dynamic, index=index,
999 exog=exog, extend_kwargs=extend_kwargs, **kwargs)
1000 return out
1002 @Appender(MLEResults.simulate.__doc__)
1003 def simulate(self, nsimulations, measurement_shocks=None,
1004 state_shocks=None, initial_state=None, anchor=None,
1005 repetitions=None, exog=None, extend_model=None,
1006 extend_kwargs=None, **kwargs):
1007 if anchor is None or anchor == 'start':
1008 iloc = 0
1009 elif anchor == 'end':
1010 iloc = self.nobs
1011 else:
1012 iloc, _, _ = self.model._get_index_loc(anchor)
1014 if iloc < 0:
1015 iloc = self.nobs + iloc
1016 if iloc > self.nobs:
1017 raise ValueError('Cannot anchor simulation after the estimated'
1018 ' sample.')
1020 out_of_sample = max(iloc + nsimulations - self.nobs, 0)
1022 # Normalize `exog`
1023 exog = self.model._validate_out_of_sample_exog(exog, out_of_sample)
1025 with self._set_final_predicted_state(exog, out_of_sample):
1026 out = super(VARMAXResults, self).simulate(
1027 nsimulations, measurement_shocks=measurement_shocks,
1028 state_shocks=state_shocks, initial_state=initial_state,
1029 anchor=anchor, repetitions=repetitions, exog=exog,
1030 extend_model=extend_model, extend_kwargs=extend_kwargs,
1031 **kwargs)
1033 return out
1035 @Appender(MLEResults.summary.__doc__)
1036 def summary(self, alpha=.05, start=None, separate_params=True):
1037 from statsmodels.iolib.summary import summary_params
1039 # Create the model name
1040 spec = self.specification
1041 if spec.k_ar > 0 and spec.k_ma > 0:
1042 model_name = 'VARMA'
1043 order = '(%s,%s)' % (spec.k_ar, spec.k_ma)
1044 elif spec.k_ar > 0:
1045 model_name = 'VAR'
1046 order = '(%s)' % (spec.k_ar)
1047 else:
1048 model_name = 'VMA'
1049 order = '(%s)' % (spec.k_ma)
1050 if spec.k_exog > 0:
1051 model_name += 'X'
1052 model_name = [model_name + order]
1054 if spec.k_trend > 0:
1055 model_name.append('intercept')
1057 if spec.measurement_error:
1058 model_name.append('measurement error')
1060 summary = super(VARMAXResults, self).summary(
1061 alpha=alpha, start=start, model_name=model_name,
1062 display_params=not separate_params
1063 )
1065 if separate_params:
1066 indices = np.arange(len(self.params))
1068 def make_table(self, mask, title, strip_end=True):
1069 res = (self, self.params[mask], self.bse[mask],
1070 self.zvalues[mask], self.pvalues[mask],
1071 self.conf_int(alpha)[mask])
1073 param_names = []
1074 for name in np.array(self.data.param_names)[mask].tolist():
1075 if strip_end:
1076 param_name = '.'.join(name.split('.')[:-1])
1077 else:
1078 param_name = name
1079 if name in self.fixed_params:
1080 param_name = '%s (fixed)' % param_name
1081 param_names.append(param_name)
1083 return summary_params(res, yname=None, xname=param_names,
1084 alpha=alpha, use_t=False, title=title)
1086 # Add parameter tables for each endogenous variable
1087 k_endog = self.model.k_endog
1088 k_ar = self.model.k_ar
1089 k_ma = self.model.k_ma
1090 k_trend = self.model.k_trend
1091 k_exog = self.model.k_exog
1092 endog_masks = []
1093 for i in range(k_endog):
1094 masks = []
1095 offset = 0
1097 # 1. Intercept terms
1098 if k_trend > 0:
1099 masks.append(np.arange(i, i + k_endog * k_trend, k_endog))
1100 offset += k_endog * k_trend
1102 # 2. AR terms
1103 if k_ar > 0:
1104 start = i * k_endog * k_ar
1105 end = (i + 1) * k_endog * k_ar
1106 masks.append(
1107 offset + np.arange(start, end))
1108 offset += k_ar * k_endog**2
1110 # 3. MA terms
1111 if k_ma > 0:
1112 start = i * k_endog * k_ma
1113 end = (i + 1) * k_endog * k_ma
1114 masks.append(
1115 offset + np.arange(start, end))
1116 offset += k_ma * k_endog**2
1118 # 4. Regression terms
1119 if k_exog > 0:
1120 masks.append(
1121 offset + np.arange(i * k_exog, (i + 1) * k_exog))
1122 offset += k_endog * k_exog
1124 # 5. Measurement error variance terms
1125 if self.model.measurement_error:
1126 masks.append(
1127 np.array(self.model.k_params - i - 1, ndmin=1))
1129 # Create the table
1130 mask = np.concatenate(masks)
1131 endog_masks.append(mask)
1133 endog_names = self.model.endog_names
1134 if not isinstance(endog_names, list):
1135 endog_names = [endog_names]
1136 title = "Results for equation %s" % endog_names[i]
1137 table = make_table(self, mask, title)
1138 summary.tables.append(table)
1140 # State covariance terms
1141 state_cov_mask = (
1142 np.arange(len(self.params))[self.model._params_state_cov])
1143 table = make_table(self, state_cov_mask, "Error covariance matrix",
1144 strip_end=False)
1145 summary.tables.append(table)
1147 # Add a table for all other parameters
1148 masks = []
1149 for m in (endog_masks, [state_cov_mask]):
1150 m = np.array(m).flatten()
1151 if len(m) > 0:
1152 masks.append(m)
1153 masks = np.concatenate(masks)
1154 inverse_mask = np.array(list(set(indices).difference(set(masks))))
1155 if len(inverse_mask) > 0:
1156 table = make_table(self, inverse_mask, "Other parameters",
1157 strip_end=False)
1158 summary.tables.append(table)
1160 return summary
1163class VARMAXResultsWrapper(MLEResultsWrapper):
1164 _attrs = {}
1165 _wrap_attrs = wrap.union_dicts(MLEResultsWrapper._wrap_attrs,
1166 _attrs)
1167 _methods = {}
1168 _wrap_methods = wrap.union_dicts(MLEResultsWrapper._wrap_methods,
1169 _methods)
1170wrap.populate_wrapper(VARMAXResultsWrapper, VARMAXResults) # noqa:E305