Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/arima/estimators/hannan_rissanen.py : 11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Hannan-Rissanen procedure for estimating ARMA(p,q) model parameters.
4Author: Chad Fulton
5License: BSD-3
6"""
7import numpy as np
9from scipy.signal import lfilter
10from statsmodels.tools.tools import Bunch
11from statsmodels.regression.linear_model import OLS, yule_walker
12from statsmodels.tsa.tsatools import lagmat
14from statsmodels.tsa.arima.specification import SARIMAXSpecification
15from statsmodels.tsa.arima.params import SARIMAXParams
18def hannan_rissanen(endog, ar_order=0, ma_order=0, demean=True,
19 initial_ar_order=None, unbiased=None):
20 """
21 Estimate ARMA parameters using Hannan-Rissanen procedure.
23 Parameters
24 ----------
25 endog : array_like
26 Input time series array, assumed to be stationary.
27 ar_order : int
28 Autoregressive order
29 ma_order : int
30 Moving average order
31 demean : bool, optional
32 Whether to estimate and remove the mean from the process prior to
33 fitting the ARMA coefficients. Default is True.
34 initial_ar_order : int, optional
35 Order of long autoregressive process used for initial computation of
36 residuals.
37 unbiased: bool, optional
38 Whether or not to apply the bias correction step. Default is True if
39 the estimated coefficients from the previous step imply a stationary
40 and invertible process and False otherwise.
42 Returns
43 -------
44 parameters : SARIMAXParams object
45 other_results : Bunch
46 Includes three components: `spec`, containing the
47 `SARIMAXSpecification` instance corresponding to the input arguments;
48 `initial_ar_order`, containing the autoregressive lag order used in the
49 first step; and `resid`, which contains the computed residuals from the
50 last step.
52 Notes
53 -----
54 The primary reference is [1]_, section 5.1.4, which describes a three-step
55 procedure that we implement here.
57 1. Fit a large-order AR model via Yule-Walker to estimate residuals
58 2. Compute AR and MA estimates via least squares
59 3. (Unless the estimated coefficients from step (2) are non-stationary /
60 non-invertible or `unbiased=False`) Perform bias correction
62 The order used for the AR model in the first step may be given as an
63 argument. If it is not, we compute it as suggested by [2]_.
65 The estimate of the variance that we use is computed from the residuals
66 of the least-squares regression and not from the innovations algorithm.
67 This is because our fast implementation of the innovations algorithm is
68 only valid for stationary processes, and the Hannan-Rissanen procedure may
69 produce estimates that imply non-stationary processes. To avoid
70 inconsistency, we never compute this latter variance here, even if it is
71 possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for
72 an example of how to compute this variance manually.
74 This procedure assumes that the series is stationary, but if this is not
75 true, it is still possible that this procedure will return parameters that
76 imply a non-stationary / non-invertible process.
78 Note that the third stage will only be applied if the parameters from the
79 second stage imply a stationary / invertible model. If `unbiased=True` is
80 given, then non-stationary / non-invertible parameters in the second stage
81 will throw an exception.
83 References
84 ----------
85 .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
86 Introduction to Time Series and Forecasting. Springer.
87 .. [2] Gomez, Victor, and Agustin Maravall. 2001.
88 "Automatic Modeling Methods for Univariate Series."
89 A Course in Time Series Analysis, 171–201.
90 """
91 spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order)
92 endog = spec.endog
93 if demean:
94 endog = endog - endog.mean()
96 p = SARIMAXParams(spec=spec)
98 nobs = len(endog)
99 max_ar_order = spec.max_ar_order
100 max_ma_order = spec.max_ma_order
102 # Default initial_ar_order is as suggested by Gomez and Maravall (2001)
103 if initial_ar_order is None:
104 initial_ar_order = max(np.floor(np.log(nobs)**2).astype(int),
105 2 * max(max_ar_order, max_ma_order))
106 # Create a spec, just to validate the initial autoregressive order
107 _ = SARIMAXSpecification(endog, ar_order=initial_ar_order)
109 # Compute lagged endog
110 # (`ar_ix`, and `ma_ix` below, are to account for non-consecutive lags;
111 # for indexing purposes, must have dtype int)
112 ar_ix = np.array(spec.ar_lags, dtype=int) - 1
113 lagged_endog = lagmat(endog, max_ar_order, trim='both')[:, ar_ix]
115 # If no AR or MA components, this is just a variance computation
116 if max_ma_order == 0 and max_ar_order == 0:
117 p.sigma2 = np.var(endog, ddof=0)
118 resid = endog.copy()
119 # If no MA component, this is just CSS
120 elif max_ma_order == 0:
121 mod = OLS(endog[max_ar_order:], lagged_endog)
122 res = mod.fit()
123 resid = res.resid
124 p.ar_params = res.params
125 p.sigma2 = res.scale
126 # Otherwise ARMA model
127 else:
128 # Step 1: Compute long AR model via Yule-Walker, get residuals
129 initial_ar_params, _ = yule_walker(
130 endog, order=initial_ar_order, method='mle')
131 X = lagmat(endog, initial_ar_order, trim='both')
132 y = endog[initial_ar_order:]
133 resid = y - X.dot(initial_ar_params)
135 # Get lagged residuals for `exog` in least-squares regression
136 ma_ix = np.array(spec.ma_lags, dtype=int) - 1
137 lagged_resid = lagmat(resid, max_ma_order, trim='both')[:, ma_ix]
139 # Step 2: estimate ARMA model via least squares
140 ix = initial_ar_order + max_ma_order - max_ar_order
141 mod = OLS(endog[initial_ar_order + max_ma_order:],
142 np.c_[lagged_endog[ix:], lagged_resid])
143 res = mod.fit()
144 p.ar_params = res.params[:spec.k_ar_params]
145 p.ma_params = res.params[spec.k_ar_params:]
146 resid = res.resid
147 p.sigma2 = res.scale
149 # Step 3: bias correction (if requested)
150 if unbiased is True or unbiased is None:
151 if p.is_stationary and p.is_invertible:
152 Z = np.zeros_like(endog)
153 V = np.zeros_like(endog)
154 W = np.zeros_like(endog)
156 ar_coef = p.ar_poly.coef
157 ma_coef = p.ma_poly.coef
159 for t in range(nobs):
160 if t >= max(max_ar_order, max_ma_order):
161 # Note: in the case of non-consecutive lag orders, the
162 # polynomials have the appropriate zeros so we don't
163 # need to subset `endog[t - max_ar_order:t]` or
164 # Z[t - max_ma_order:t]
165 tmp_ar = np.dot(
166 -ar_coef[1:], endog[t - max_ar_order:t][::-1])
167 tmp_ma = np.dot(ma_coef[1:],
168 Z[t - max_ma_order:t][::-1])
169 Z[t] = endog[t] - tmp_ar - tmp_ma
171 V = lfilter([1], ar_coef, Z)
172 W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z)
174 lagged_V = lagmat(V, max_ar_order, trim='both')
175 lagged_W = lagmat(W, max_ma_order, trim='both')
177 exog = np.c_[
178 lagged_V[max(max_ma_order - max_ar_order, 0):, ar_ix],
179 lagged_W[max(max_ar_order - max_ma_order, 0):, ma_ix]]
181 mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog)
182 res_unbias = mod_unbias.fit()
184 p.ar_params = (
185 p.ar_params + res_unbias.params[:spec.k_ar_params])
186 p.ma_params = (
187 p.ma_params + res_unbias.params[spec.k_ar_params:])
189 # Recompute sigma2
190 resid = mod.endog - mod.exog.dot(
191 np.r_[p.ar_params, p.ma_params])
192 p.sigma2 = np.inner(resid, resid) / len(resid)
193 elif unbiased is True:
194 raise ValueError('Cannot perform third step of Hannan-Rissanen'
195 ' estimation to remove paramater bias,'
196 ' because parameters estimated from the'
197 ' second step are non-stationary or'
198 ' non-invertible')
200 # TODO: Gomez and Maravall (2001) or Gomez (1998)
201 # propose one more step here to further improve MA estimates
203 # Construct results
204 other_results = Bunch({
205 'spec': spec,
206 'initial_ar_order': initial_ar_order,
207 'resid': resid
208 })
210 return p, other_results