Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/arima/estimators/gls.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Feasible generalized least squares for regression with SARIMA errors.
4Author: Chad Fulton
5License: BSD-3
6"""
7import numpy as np
8import warnings
10from statsmodels.tools.tools import add_constant, Bunch
11from statsmodels.regression.linear_model import OLS
12from statsmodels.tsa.innovations import arma_innovations
13from statsmodels.tsa.statespace.tools import diff
15from statsmodels.tsa.arima.estimators.yule_walker import yule_walker
16from statsmodels.tsa.arima.estimators.burg import burg
17from statsmodels.tsa.arima.estimators.hannan_rissanen import hannan_rissanen
18from statsmodels.tsa.arima.estimators.innovations import (
19 innovations, innovations_mle)
20from statsmodels.tsa.arima.estimators.statespace import statespace
22from statsmodels.tsa.arima.specification import SARIMAXSpecification
23from statsmodels.tsa.arima.params import SARIMAXParams
26def gls(endog, exog=None, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0),
27 include_constant=None, n_iter=None, max_iter=50, tolerance=1e-8,
28 arma_estimator='innovations_mle', arma_estimator_kwargs=None):
29 """
30 Estimate ARMAX parameters by GLS.
32 Parameters
33 ----------
34 endog : array_like
35 Input time series array.
36 exog : array_like, optional
37 Array of exogenous regressors. If not included, then `include_constant`
38 must be True, and then `exog` will only include the constant column.
39 order : tuple, optional
40 The (p,d,q) order of the ARIMA model. Default is (0, 0, 0).
41 seasonal_order : tuple, optional
42 The (P,D,Q,s) order of the seasonal ARIMA model.
43 Default is (0, 0, 0, 0).
44 include_constant : bool, optional
45 Whether to add a constant term in `exog` if it's not already there.
46 The estimate of the constant will then appear as one of the `exog`
47 parameters. If `exog` is None, then the constant will represent the
48 mean of the process. Default is True if the specified model does not
49 include integration and False otherwise.
50 n_iter : int, optional
51 Optionally iterate feasible GSL a specific number of times. Default is
52 to iterate to convergence. If set, this argument overrides the
53 `max_iter` and `tolerance` arguments.
54 max_iter : int, optional
55 Maximum number of feasible GLS iterations. Default is 50. If `n_iter`
56 is set, it overrides this argument.
57 tolerance : float, optional
58 Tolerance for determining convergence of feasible GSL iterations. If
59 `iter` is set, this argument has no effect.
60 Default is 1e-8.
61 arma_estimator : str, optional
62 The estimator used for estimating the ARMA model. This option should
63 not generally be used, unless the default method is failing or is
64 otherwise unsuitable. Not all values will be valid, depending on the
65 specified model orders (`order` and `seasonal_order`). Possible values
66 are:
67 * 'innovations_mle' - can be used with any specification
68 * 'statespace' - can be used with any specification
69 * 'hannan_rissanen' - can be used with any ARMA non-seasonal model
70 * 'yule_walker' - only non-seasonal consecutive
71 autoregressive (AR) models
72 * 'burg' - only non-seasonal, consecutive autoregressive (AR) models
73 * 'innovations' - only non-seasonal, consecutive moving
74 average (MA) models.
75 The default is 'innovations_mle'.
76 arma_estimator_kwargs : dict, optional
77 Arguments to pass to the ARMA estimator.
79 Returns
80 -------
81 parameters : SARIMAXParams object
82 Contains the parameter estimates from the final iteration.
83 other_results : Bunch
84 Includes eight components: `spec`, `params`, `converged`,
85 `differences`, `iterations`, `arma_estimator`, 'arma_estimator_kwargs',
86 and `arma_results`.
88 Notes
89 -----
90 The primary reference is [1]_, section 6.6. In particular, the
91 implementation follows the iterative procedure described in section 6.6.2.
92 Construction of the transformed variables used to compute the GLS estimator
93 described in section 6.6.1 is done via an application of the innovations
94 algorithm (rather than explicit construction of the transformation matrix).
96 Note that if the specified model includes integration, both the `endog` and
97 `exog` series will be differenced prior to estimation and a warning will
98 be issued to alert the user.
100 References
101 ----------
102 .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
103 Introduction to Time Series and Forecasting. Springer.
104 """
105 # Handle n_iter
106 if n_iter is not None:
107 max_iter = n_iter
108 tolerance = np.inf
110 # Default for include_constant is True if there is no integration and
111 # False otherwise
112 integrated = order[1] > 0 or seasonal_order[1] > 0
113 if include_constant is None:
114 include_constant = not integrated
115 elif include_constant and integrated:
116 raise ValueError('Cannot include a constant in an integrated model.')
118 # Handle including the constant (need to do it now so that the constant
119 # parameter can be included in the specification as part of `exog`.)
120 if include_constant:
121 exog = np.ones_like(endog) if exog is None else add_constant(exog)
123 # Create the SARIMAX specification
124 spec = SARIMAXSpecification(endog, exog=exog, order=order,
125 seasonal_order=seasonal_order)
126 endog = spec.endog
127 exog = spec.exog
129 # Handle integration
130 if spec.is_integrated:
131 # TODO: this is the approach suggested by BD (see Remark 1 in
132 # section 6.6.2 and Example 6.6.3), but maybe there are some cases
133 # where we don't want to force this behavior on the user?
134 warnings.warn('Provided `endog` and `exog` series have been'
135 ' differenced to eliminate integration prior to GLS'
136 ' parameter estimation.')
137 endog = diff(endog, k_diff=spec.diff,
138 k_seasonal_diff=spec.seasonal_diff,
139 seasonal_periods=spec.seasonal_periods)
140 exog = diff(exog, k_diff=spec.diff,
141 k_seasonal_diff=spec.seasonal_diff,
142 seasonal_periods=spec.seasonal_periods)
143 augmented = np.c_[endog, exog]
145 # Validate arma_estimator
146 spec.validate_estimator(arma_estimator)
147 if arma_estimator_kwargs is None:
148 arma_estimator_kwargs = {}
150 # Step 1: OLS
151 mod_ols = OLS(endog, exog)
152 res_ols = mod_ols.fit()
153 exog_params = res_ols.params
154 resid = res_ols.resid
156 # 0th iteration parameters
157 p = SARIMAXParams(spec=spec)
158 p.exog_params = exog_params
159 if spec.max_ar_order > 0:
160 p.ar_params = np.zeros(spec.k_ar_params)
161 if spec.max_seasonal_ar_order > 0:
162 p.seasonal_ar_params = np.zeros(spec.k_seasonal_ar_params)
163 if spec.max_ma_order > 0:
164 p.ma_params = np.zeros(spec.k_ma_params)
165 if spec.max_seasonal_ma_order > 0:
166 p.seasonal_ma_params = np.zeros(spec.k_seasonal_ma_params)
167 p.sigma2 = res_ols.scale
169 ar_params = p.ar_params
170 seasonal_ar_params = p.seasonal_ar_params
171 ma_params = p.ma_params
172 seasonal_ma_params = p.seasonal_ma_params
173 sigma2 = p.sigma2
175 # Step 2 - 4: iterate feasible GLS to convergence
176 arma_results = [None]
177 differences = [None]
178 parameters = [p]
179 converged = False if n_iter is None else None
180 i = 0
181 for i in range(1, max_iter + 1):
182 prev = exog_params
184 # Step 2: ARMA
185 # TODO: allow estimator-specific kwargs?
186 if arma_estimator == 'yule_walker':
187 p_arma, res_arma = yule_walker(
188 resid, ar_order=spec.ar_order, demean=False,
189 **arma_estimator_kwargs)
190 elif arma_estimator == 'burg':
191 p_arma, res_arma = burg(resid, ar_order=spec.ar_order,
192 demean=False, **arma_estimator_kwargs)
193 elif arma_estimator == 'innovations':
194 out, res_arma = innovations(resid, ma_order=spec.ma_order,
195 demean=False, **arma_estimator_kwargs)
196 p_arma = out[-1]
197 elif arma_estimator == 'hannan_rissanen':
198 p_arma, res_arma = hannan_rissanen(
199 resid, ar_order=spec.ar_order, ma_order=spec.ma_order,
200 demean=False, **arma_estimator_kwargs)
201 else:
202 # For later iterations, use a "warm start" for parameter estimates
203 # (speeds up estimation and convergence)
204 start_params = (
205 None if i == 1 else np.r_[ar_params, ma_params,
206 seasonal_ar_params,
207 seasonal_ma_params, sigma2])
208 # Note: in each case, we do not pass in the order of integration
209 # since we have already differenced the series
210 tmp_order = (spec.order[0], 0, spec.order[2])
211 tmp_seasonal_order = (spec.seasonal_order[0], 0,
212 spec.seasonal_order[2],
213 spec.seasonal_order[3])
214 if arma_estimator == 'innovations_mle':
215 p_arma, res_arma = innovations_mle(
216 resid, order=tmp_order, seasonal_order=tmp_seasonal_order,
217 demean=False, start_params=start_params,
218 **arma_estimator_kwargs)
219 else:
220 p_arma, res_arma = statespace(
221 resid, order=tmp_order, seasonal_order=tmp_seasonal_order,
222 include_constant=False, start_params=start_params,
223 **arma_estimator_kwargs)
225 ar_params = p_arma.ar_params
226 seasonal_ar_params = p_arma.seasonal_ar_params
227 ma_params = p_arma.ma_params
228 seasonal_ma_params = p_arma.seasonal_ma_params
229 sigma2 = p_arma.sigma2
230 arma_results.append(res_arma)
232 # Step 3: GLS
233 # Compute transformed variables that satisfy OLS assumptions
234 # Note: In section 6.1.1 of Brockwell and Davis (2016), these
235 # transformations are developed as computed by left multiplcation
236 # by a matrix T. However, explicitly constructing T and then
237 # performing the left-multiplications does not scale well when nobs is
238 # large. Instead, we can retrieve the transformed variables as the
239 # residuals of the innovations algorithm (the `normalize=True`
240 # argument applies a Prais-Winsten-type normalization to the first few
241 # observations to ensure homoskedasticity). Brockwell and Davis
242 # mention that they also take this approach in practice.
243 tmp, _ = arma_innovations.arma_innovations(
244 augmented, ar_params=ar_params, ma_params=ma_params,
245 normalize=True)
246 u = tmp[:, 0]
247 x = tmp[:, 1:]
249 # OLS on transformed variables
250 mod_gls = OLS(u, x)
251 res_gls = mod_gls.fit()
252 exog_params = res_gls.params
253 resid = endog - np.dot(exog, exog_params)
255 # Construct the parameter vector for the iteration
256 p = SARIMAXParams(spec=spec)
257 p.exog_params = exog_params
258 if spec.max_ar_order > 0:
259 p.ar_params = ar_params
260 if spec.max_seasonal_ar_order > 0:
261 p.seasonal_ar_params = seasonal_ar_params
262 if spec.max_ma_order > 0:
263 p.ma_params = ma_params
264 if spec.max_seasonal_ma_order > 0:
265 p.seasonal_ma_params = seasonal_ma_params
266 p.sigma2 = sigma2
267 parameters.append(p)
269 # Check for convergence
270 difference = np.abs(exog_params - prev)
271 differences.append(difference)
272 if n_iter is None and np.all(difference < tolerance):
273 converged = True
274 break
275 else:
276 if n_iter is None:
277 warnings.warn('Feasible GLS failed to converge in %d iterations.'
278 ' Consider increasing the maximum number of'
279 ' iterations using the `max_iter` argument or'
280 ' reducing the required tolerance using the'
281 ' `tolerance` argument.' % max_iter)
283 # Construct final results
284 p = parameters[-1]
285 other_results = Bunch({
286 'spec': spec,
287 'params': parameters,
288 'converged': converged,
289 'differences': differences,
290 'iterations': i,
291 'arma_estimator': arma_estimator,
292 'arma_estimator_kwargs': arma_estimator_kwargs,
293 'arma_results': arma_results,
294 })
296 return p, other_results