Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Feasible generalized least squares for regression with SARIMA errors. 

3 

4Author: Chad Fulton 

5License: BSD-3 

6""" 

7import numpy as np 

8import warnings 

9 

10from statsmodels.tools.tools import add_constant, Bunch 

11from statsmodels.regression.linear_model import OLS 

12from statsmodels.tsa.innovations import arma_innovations 

13from statsmodels.tsa.statespace.tools import diff 

14 

15from statsmodels.tsa.arima.estimators.yule_walker import yule_walker 

16from statsmodels.tsa.arima.estimators.burg import burg 

17from statsmodels.tsa.arima.estimators.hannan_rissanen import hannan_rissanen 

18from statsmodels.tsa.arima.estimators.innovations import ( 

19 innovations, innovations_mle) 

20from statsmodels.tsa.arima.estimators.statespace import statespace 

21 

22from statsmodels.tsa.arima.specification import SARIMAXSpecification 

23from statsmodels.tsa.arima.params import SARIMAXParams 

24 

25 

26def gls(endog, exog=None, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0), 

27 include_constant=None, n_iter=None, max_iter=50, tolerance=1e-8, 

28 arma_estimator='innovations_mle', arma_estimator_kwargs=None): 

29 """ 

30 Estimate ARMAX parameters by GLS. 

31 

32 Parameters 

33 ---------- 

34 endog : array_like 

35 Input time series array. 

36 exog : array_like, optional 

37 Array of exogenous regressors. If not included, then `include_constant` 

38 must be True, and then `exog` will only include the constant column. 

39 order : tuple, optional 

40 The (p,d,q) order of the ARIMA model. Default is (0, 0, 0). 

41 seasonal_order : tuple, optional 

42 The (P,D,Q,s) order of the seasonal ARIMA model. 

43 Default is (0, 0, 0, 0). 

44 include_constant : bool, optional 

45 Whether to add a constant term in `exog` if it's not already there. 

46 The estimate of the constant will then appear as one of the `exog` 

47 parameters. If `exog` is None, then the constant will represent the 

48 mean of the process. Default is True if the specified model does not 

49 include integration and False otherwise. 

50 n_iter : int, optional 

51 Optionally iterate feasible GSL a specific number of times. Default is 

52 to iterate to convergence. If set, this argument overrides the 

53 `max_iter` and `tolerance` arguments. 

54 max_iter : int, optional 

55 Maximum number of feasible GLS iterations. Default is 50. If `n_iter` 

56 is set, it overrides this argument. 

57 tolerance : float, optional 

58 Tolerance for determining convergence of feasible GSL iterations. If 

59 `iter` is set, this argument has no effect. 

60 Default is 1e-8. 

61 arma_estimator : str, optional 

62 The estimator used for estimating the ARMA model. This option should 

63 not generally be used, unless the default method is failing or is 

64 otherwise unsuitable. Not all values will be valid, depending on the 

65 specified model orders (`order` and `seasonal_order`). Possible values 

66 are: 

67 * 'innovations_mle' - can be used with any specification 

68 * 'statespace' - can be used with any specification 

69 * 'hannan_rissanen' - can be used with any ARMA non-seasonal model 

70 * 'yule_walker' - only non-seasonal consecutive 

71 autoregressive (AR) models 

72 * 'burg' - only non-seasonal, consecutive autoregressive (AR) models 

73 * 'innovations' - only non-seasonal, consecutive moving 

74 average (MA) models. 

75 The default is 'innovations_mle'. 

76 arma_estimator_kwargs : dict, optional 

77 Arguments to pass to the ARMA estimator. 

78 

79 Returns 

80 ------- 

81 parameters : SARIMAXParams object 

82 Contains the parameter estimates from the final iteration. 

83 other_results : Bunch 

84 Includes eight components: `spec`, `params`, `converged`, 

85 `differences`, `iterations`, `arma_estimator`, 'arma_estimator_kwargs', 

86 and `arma_results`. 

87 

88 Notes 

89 ----- 

90 The primary reference is [1]_, section 6.6. In particular, the 

91 implementation follows the iterative procedure described in section 6.6.2. 

92 Construction of the transformed variables used to compute the GLS estimator 

93 described in section 6.6.1 is done via an application of the innovations 

94 algorithm (rather than explicit construction of the transformation matrix). 

95 

96 Note that if the specified model includes integration, both the `endog` and 

97 `exog` series will be differenced prior to estimation and a warning will 

98 be issued to alert the user. 

99 

100 References 

101 ---------- 

102 .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. 

103 Introduction to Time Series and Forecasting. Springer. 

104 """ 

105 # Handle n_iter 

106 if n_iter is not None: 

107 max_iter = n_iter 

108 tolerance = np.inf 

109 

110 # Default for include_constant is True if there is no integration and 

111 # False otherwise 

112 integrated = order[1] > 0 or seasonal_order[1] > 0 

113 if include_constant is None: 

114 include_constant = not integrated 

115 elif include_constant and integrated: 

116 raise ValueError('Cannot include a constant in an integrated model.') 

117 

118 # Handle including the constant (need to do it now so that the constant 

119 # parameter can be included in the specification as part of `exog`.) 

120 if include_constant: 

121 exog = np.ones_like(endog) if exog is None else add_constant(exog) 

122 

123 # Create the SARIMAX specification 

124 spec = SARIMAXSpecification(endog, exog=exog, order=order, 

125 seasonal_order=seasonal_order) 

126 endog = spec.endog 

127 exog = spec.exog 

128 

129 # Handle integration 

130 if spec.is_integrated: 

131 # TODO: this is the approach suggested by BD (see Remark 1 in 

132 # section 6.6.2 and Example 6.6.3), but maybe there are some cases 

133 # where we don't want to force this behavior on the user? 

134 warnings.warn('Provided `endog` and `exog` series have been' 

135 ' differenced to eliminate integration prior to GLS' 

136 ' parameter estimation.') 

137 endog = diff(endog, k_diff=spec.diff, 

138 k_seasonal_diff=spec.seasonal_diff, 

139 seasonal_periods=spec.seasonal_periods) 

140 exog = diff(exog, k_diff=spec.diff, 

141 k_seasonal_diff=spec.seasonal_diff, 

142 seasonal_periods=spec.seasonal_periods) 

143 augmented = np.c_[endog, exog] 

144 

145 # Validate arma_estimator 

146 spec.validate_estimator(arma_estimator) 

147 if arma_estimator_kwargs is None: 

148 arma_estimator_kwargs = {} 

149 

150 # Step 1: OLS 

151 mod_ols = OLS(endog, exog) 

152 res_ols = mod_ols.fit() 

153 exog_params = res_ols.params 

154 resid = res_ols.resid 

155 

156 # 0th iteration parameters 

157 p = SARIMAXParams(spec=spec) 

158 p.exog_params = exog_params 

159 if spec.max_ar_order > 0: 

160 p.ar_params = np.zeros(spec.k_ar_params) 

161 if spec.max_seasonal_ar_order > 0: 

162 p.seasonal_ar_params = np.zeros(spec.k_seasonal_ar_params) 

163 if spec.max_ma_order > 0: 

164 p.ma_params = np.zeros(spec.k_ma_params) 

165 if spec.max_seasonal_ma_order > 0: 

166 p.seasonal_ma_params = np.zeros(spec.k_seasonal_ma_params) 

167 p.sigma2 = res_ols.scale 

168 

169 ar_params = p.ar_params 

170 seasonal_ar_params = p.seasonal_ar_params 

171 ma_params = p.ma_params 

172 seasonal_ma_params = p.seasonal_ma_params 

173 sigma2 = p.sigma2 

174 

175 # Step 2 - 4: iterate feasible GLS to convergence 

176 arma_results = [None] 

177 differences = [None] 

178 parameters = [p] 

179 converged = False if n_iter is None else None 

180 i = 0 

181 for i in range(1, max_iter + 1): 

182 prev = exog_params 

183 

184 # Step 2: ARMA 

185 # TODO: allow estimator-specific kwargs? 

186 if arma_estimator == 'yule_walker': 

187 p_arma, res_arma = yule_walker( 

188 resid, ar_order=spec.ar_order, demean=False, 

189 **arma_estimator_kwargs) 

190 elif arma_estimator == 'burg': 

191 p_arma, res_arma = burg(resid, ar_order=spec.ar_order, 

192 demean=False, **arma_estimator_kwargs) 

193 elif arma_estimator == 'innovations': 

194 out, res_arma = innovations(resid, ma_order=spec.ma_order, 

195 demean=False, **arma_estimator_kwargs) 

196 p_arma = out[-1] 

197 elif arma_estimator == 'hannan_rissanen': 

198 p_arma, res_arma = hannan_rissanen( 

199 resid, ar_order=spec.ar_order, ma_order=spec.ma_order, 

200 demean=False, **arma_estimator_kwargs) 

201 else: 

202 # For later iterations, use a "warm start" for parameter estimates 

203 # (speeds up estimation and convergence) 

204 start_params = ( 

205 None if i == 1 else np.r_[ar_params, ma_params, 

206 seasonal_ar_params, 

207 seasonal_ma_params, sigma2]) 

208 # Note: in each case, we do not pass in the order of integration 

209 # since we have already differenced the series 

210 tmp_order = (spec.order[0], 0, spec.order[2]) 

211 tmp_seasonal_order = (spec.seasonal_order[0], 0, 

212 spec.seasonal_order[2], 

213 spec.seasonal_order[3]) 

214 if arma_estimator == 'innovations_mle': 

215 p_arma, res_arma = innovations_mle( 

216 resid, order=tmp_order, seasonal_order=tmp_seasonal_order, 

217 demean=False, start_params=start_params, 

218 **arma_estimator_kwargs) 

219 else: 

220 p_arma, res_arma = statespace( 

221 resid, order=tmp_order, seasonal_order=tmp_seasonal_order, 

222 include_constant=False, start_params=start_params, 

223 **arma_estimator_kwargs) 

224 

225 ar_params = p_arma.ar_params 

226 seasonal_ar_params = p_arma.seasonal_ar_params 

227 ma_params = p_arma.ma_params 

228 seasonal_ma_params = p_arma.seasonal_ma_params 

229 sigma2 = p_arma.sigma2 

230 arma_results.append(res_arma) 

231 

232 # Step 3: GLS 

233 # Compute transformed variables that satisfy OLS assumptions 

234 # Note: In section 6.1.1 of Brockwell and Davis (2016), these 

235 # transformations are developed as computed by left multiplcation 

236 # by a matrix T. However, explicitly constructing T and then 

237 # performing the left-multiplications does not scale well when nobs is 

238 # large. Instead, we can retrieve the transformed variables as the 

239 # residuals of the innovations algorithm (the `normalize=True` 

240 # argument applies a Prais-Winsten-type normalization to the first few 

241 # observations to ensure homoskedasticity). Brockwell and Davis 

242 # mention that they also take this approach in practice. 

243 tmp, _ = arma_innovations.arma_innovations( 

244 augmented, ar_params=ar_params, ma_params=ma_params, 

245 normalize=True) 

246 u = tmp[:, 0] 

247 x = tmp[:, 1:] 

248 

249 # OLS on transformed variables 

250 mod_gls = OLS(u, x) 

251 res_gls = mod_gls.fit() 

252 exog_params = res_gls.params 

253 resid = endog - np.dot(exog, exog_params) 

254 

255 # Construct the parameter vector for the iteration 

256 p = SARIMAXParams(spec=spec) 

257 p.exog_params = exog_params 

258 if spec.max_ar_order > 0: 

259 p.ar_params = ar_params 

260 if spec.max_seasonal_ar_order > 0: 

261 p.seasonal_ar_params = seasonal_ar_params 

262 if spec.max_ma_order > 0: 

263 p.ma_params = ma_params 

264 if spec.max_seasonal_ma_order > 0: 

265 p.seasonal_ma_params = seasonal_ma_params 

266 p.sigma2 = sigma2 

267 parameters.append(p) 

268 

269 # Check for convergence 

270 difference = np.abs(exog_params - prev) 

271 differences.append(difference) 

272 if n_iter is None and np.all(difference < tolerance): 

273 converged = True 

274 break 

275 else: 

276 if n_iter is None: 

277 warnings.warn('Feasible GLS failed to converge in %d iterations.' 

278 ' Consider increasing the maximum number of' 

279 ' iterations using the `max_iter` argument or' 

280 ' reducing the required tolerance using the' 

281 ' `tolerance` argument.' % max_iter) 

282 

283 # Construct final results 

284 p = parameters[-1] 

285 other_results = Bunch({ 

286 'spec': spec, 

287 'params': parameters, 

288 'converged': converged, 

289 'differences': differences, 

290 'iterations': i, 

291 'arma_estimator': arma_estimator, 

292 'arma_estimator_kwargs': arma_estimator_kwargs, 

293 'arma_results': arma_results, 

294 }) 

295 

296 return p, other_results