Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2SARIMAX specification class. 

3 

4Author: Chad Fulton 

5License: BSD-3 

6""" 

7import numpy as np 

8import pandas as pd 

9 

10from statsmodels.tools.data import _is_using_pandas 

11from statsmodels.tsa.base.tsa_model import TimeSeriesModel 

12from statsmodels.tsa.statespace.tools import ( 

13 is_invertible, constrain_stationary_univariate as constrain, 

14 unconstrain_stationary_univariate as unconstrain, 

15 prepare_exog, prepare_trend_spec, prepare_trend_data) 

16 

17from statsmodels.tsa.arima.tools import standardize_lag_order, validate_basic 

18 

19 

20class SARIMAXSpecification(object): 

21 """ 

22 SARIMAX specification. 

23 

24 Parameters 

25 ---------- 

26 endog : array_like, optional 

27 The observed time-series process :math:`y`. 

28 exog : array_like, optional 

29 Array of exogenous regressors. 

30 order : tuple, optional 

31 The (p,d,q) order of the model for the autoregressive, differences, and 

32 moving average components. d is always an integer, while p and q may 

33 either be integers or lists of integers. May not be used in combination 

34 with the arguments `ar_order`, `diff`, or `ma_order`. 

35 seasonal_order : tuple, optional 

36 The (P,D,Q,s) order of the seasonal component of the model for the 

37 AR parameters, differences, MA parameters, and periodicity. Default 

38 is (0, 0, 0, 0). D and s are always integers, while P and Q 

39 may either be integers or lists of positive integers. May not be used 

40 in combination with the arguments `seasonal_ar_order`, `seasonal_diff`, 

41 or `seasonal_ma_order`. 

42 ar_order : int or list of int 

43 The autoregressive order of the model. May be an integer, in which case 

44 all autoregressive lags up to and including it will be included. 

45 Alternatively, may be a list of integers specifying which lag orders 

46 are included. May not be used in combination with `order`. 

47 diff : int 

48 The order of integration of the model. May not be used in combination 

49 with `order`. 

50 ma_order : int or list of int 

51 The moving average order of the model. May be an integer or 

52 list of integers. See the documentation for `ar_order` for details. 

53 May not be used in combination with `order`. 

54 seasonal_ar_order : int or list of int 

55 The seasonal autoregressive order of the model. May be an integer or 

56 list of integers. See the documentation for `ar_order` for examples. 

57 Note that if `seasonal_periods = 4` and `seasonal_ar_order = 2`, then 

58 this implies that the overall model will include lags 4 and 8. 

59 May not be used in combination with `seasonal_order`. 

60 seasonal_diff : int 

61 The order of seasonal integration of the model. May not be used in 

62 combination with `seasonal_order`. 

63 seasonal_ma_order : int or list of int 

64 The moving average order of the model. May be an integer or 

65 list of integers. See the documentation for `ar_order` and 

66 `seasonal_ar_order` for additional details. May not be used in 

67 combination with `seasonal_order`. 

68 seasonal_periods : int 

69 Number of periods in a season. May not be used in combination with 

70 `seasonal_order`. 

71 enforce_stationarity : bool, optional 

72 Whether or not to require the autoregressive parameters to correspond 

73 to a stationarity process. This is only possible in estimation by 

74 numerical maximum likelihood. 

75 enforce_invertibility : bool, optional 

76 Whether or not to require the moving average parameters to correspond 

77 to an invertible process. This is only possible in estimation by 

78 numerical maximum likelihood. 

79 concentrate_scale : bool, optional 

80 Whether or not to concentrate the scale (variance of the error term) 

81 out of the likelihood. This reduces the number of parameters by one. 

82 This is only applicable when considering estimation by numerical 

83 maximum likelihood. 

84 dates : array-like of datetime, optional 

85 If no index is given by `endog` or `exog`, an array-like object of 

86 datetime objects can be provided. 

87 freq : str, optional 

88 If no index is given by `endog` or `exog`, the frequency of the 

89 time-series may be specified here as a Pandas offset or offset string. 

90 missing : str 

91 Available options are 'none', 'drop', and 'raise'. If 'none', no nan 

92 checking is done. If 'drop', any observations with nans are dropped. 

93 If 'raise', an error is raised. Default is 'none'. 

94 

95 Attributes 

96 ---------- 

97 order : tuple, optional 

98 The (p,d,q) order of the model for the autoregressive, differences, and 

99 moving average components. d is always an integer, while p and q may 

100 either be integers or lists of integers. 

101 seasonal_order : tuple, optional 

102 The (P,D,Q,s) order of the seasonal component of the model for the 

103 AR parameters, differences, MA parameters, and periodicity. Default 

104 is (0, 0, 0, 0). D and s are always integers, while P and Q 

105 may either be integers or lists of positive integers. 

106 ar_order : int or list of int 

107 The autoregressive order of the model. May be an integer, in which case 

108 all autoregressive lags up to and including it will be included. For 

109 example, if `ar_order = 3`, then the model will include lags 1, 2, 

110 and 3. Alternatively, may be a list of integers specifying exactly 

111 which lag orders are included. For example, if `ar_order = [1, 3]`, 

112 then the model will include lags 1 and 3 but will exclude lag 2. 

113 diff : int 

114 The order of integration of the model. 

115 ma_order : int or list of int 

116 The moving average order of the model. May be an integer or 

117 list of integers. See the documentation for `ar_order` for examples. 

118 seasonal_ar_order : int or list of int 

119 The seasonal autoregressive order of the model. May be an integer or 

120 list of integers. See the documentation for `ar_order` for examples. 

121 Note that if `seasonal_periods = 4` and `seasonal_ar_order = 2`, then 

122 this implies that the overall model will include lags 4 and 8. 

123 seasonal_diff : int 

124 The order of seasonal integration of the model. 

125 seasonal_ma_order : int or list of int 

126 The moving average order of the model. May be an integer or 

127 list of integers. See the documentation for `ar_order` and 

128 `seasonal_ar_order` for additional details. 

129 seasonal_periods : int 

130 Number of periods in a season. 

131 trend : str{'n','c','t','ct'} or iterable, optional 

132 Parameter controlling the deterministic trend polynomial :math:`A(t)`. 

133 Can be specified as a string where 'c' indicates a constant (i.e. a 

134 degree zero component of the trend polynomial), 't' indicates a 

135 linear trend with time, and 'ct' is both. Can also be specified as an 

136 iterable defining the polynomial as in `numpy.poly1d`, where 

137 `[1,1,0,1]` would denote :math:`a + bt + ct^3`. Default is to not 

138 include a trend component. 

139 ar_lags : list of int 

140 List of included autoregressive lags. If `ar_order` is a list, then 

141 `ar_lags == ar_order`. If `ar_lags = [1, 2]`, then the overall model 

142 will include the 1st and 2nd autoregressive lags. 

143 ma_lags : list of int 

144 List of included moving average lags. If `ma_order` is a list, then 

145 `ma_lags == ma_order`. If `ma_lags = [1, 2]`, then the overall model 

146 will include the 1st and 2nd moving average lags. 

147 seasonal_ar_lags : list of int 

148 List of included seasonal autoregressive lags. If `seasonal_ar_order` 

149 is a list, then `seasonal_ar_lags == seasonal_ar_order`. If 

150 `seasonal_periods = 4` and `seasonal_ar_lags = [1, 2]`, then the 

151 overall model will include the 4th and 8th autoregressive lags. 

152 seasonal_ma_lags : list of int 

153 List of included seasonal moving average lags. If `seasonal_ma_order` 

154 is a list, then `seasonal_ma_lags == seasonal_ma_order`. See the 

155 documentation to `seasonal_ar_lags` for examples. 

156 max_ar_order : int 

157 Largest included autoregressive lag. 

158 max_ma_order : int 

159 Largest included moving average lag. 

160 max_seasonal_ar_order : int 

161 Largest included seasonal autoregressive lag. 

162 max_seasonal_ma_order : int 

163 Largest included seasonal moving average lag. 

164 max_reduced_ar_order : int 

165 Largest lag in the reduced autoregressive polynomial. Equal to 

166 `max_ar_order + max_seasonal_ar_order * seasonal_periods`. 

167 max_reduced_ma_order : int 

168 Largest lag in the reduced moving average polynomial. Equal to 

169 `max_ma_order + max_seasonal_ma_order * seasonal_periods`. 

170 enforce_stationarity : bool 

171 Whether or not to transform the AR parameters to enforce stationarity 

172 in the autoregressive component of the model. This is only possible 

173 in estimation by numerical maximum likelihood. 

174 enforce_invertibility : bool 

175 Whether or not to transform the MA parameters to enforce invertibility 

176 in the moving average component of the model. This is only possible 

177 in estimation by numerical maximum likelihood. 

178 concentrate_scale : bool 

179 Whether or not to concentrate the variance (scale term) out of the 

180 log-likelihood function. This is only applicable when considering 

181 estimation by numerical maximum likelihood. 

182 is_ar_consecutive 

183 is_ma_consecutive 

184 is_integrated 

185 is_seasonal 

186 k_exog_params 

187 k_ar_params 

188 k_ma_params 

189 k_seasonal_ar_params 

190 k_seasonal_ma_params 

191 k_params 

192 exog_names 

193 ar_names 

194 ma_names 

195 seasonal_ar_names 

196 seasonal_ma_names 

197 param_names 

198 

199 Examples 

200 -------- 

201 >>> SARIMAXSpecification(order=(1, 0, 2)) 

202 SARIMAXSpecification(endog=y, order=(1, 0, 2)) 

203 

204 >>> spec = SARIMAXSpecification(ar_order=1, ma_order=2) 

205 SARIMAXSpecification(endog=y, order=(1, 0, 2)) 

206 

207 >>> spec = SARIMAXSpecification(ar_order=1, seasonal_order=(1, 0, 0, 4)) 

208 SARIMAXSpecification(endog=y, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4)) 

209 """ 

210 

211 def __init__(self, endog=None, exog=None, order=None, 

212 seasonal_order=None, ar_order=None, diff=None, ma_order=None, 

213 seasonal_ar_order=None, seasonal_diff=None, 

214 seasonal_ma_order=None, seasonal_periods=None, trend=None, 

215 enforce_stationarity=None, enforce_invertibility=None, 

216 concentrate_scale=None, trend_offset=1, dates=None, freq=None, 

217 missing='none'): 

218 

219 # Basic parameters 

220 self.enforce_stationarity = enforce_stationarity 

221 self.enforce_invertibility = enforce_invertibility 

222 self.concentrate_scale = concentrate_scale 

223 self.trend_offset = trend_offset 

224 

225 # Validate that we were not given conflicting specifications 

226 has_order = order is not None 

227 has_specific_order = (ar_order is not None or diff is not None or 

228 ma_order is not None) 

229 has_seasonal_order = seasonal_order is not None 

230 has_specific_seasonal_order = (seasonal_ar_order is not None or 

231 seasonal_diff is not None or 

232 seasonal_ma_order is not None or 

233 seasonal_periods is not None) 

234 if has_order and has_specific_order: 

235 raise ValueError('Cannot specify both `order` and either of' 

236 ' `ar_order` or `ma_order`.') 

237 if has_seasonal_order and has_specific_seasonal_order: 

238 raise ValueError('Cannot specify both `seasonal_order` and any of' 

239 ' `seasonal_ar_order`, `seasonal_ma_order`,' 

240 ' or `seasonal_periods`.') 

241 

242 # Compute `order` 

243 if has_specific_order: 

244 ar_order = 0 if ar_order is None else ar_order 

245 diff = 0 if diff is None else diff 

246 ma_order = 0 if ma_order is None else ma_order 

247 order = (ar_order, diff, ma_order) 

248 elif not has_order: 

249 order = (0, 0, 0) 

250 

251 # Compute `seasonal_order` 

252 if has_specific_seasonal_order: 

253 seasonal_ar_order = ( 

254 0 if seasonal_ar_order is None else seasonal_ar_order) 

255 seasonal_diff = 0 if seasonal_diff is None else seasonal_diff 

256 seasonal_ma_order = ( 

257 0 if seasonal_ma_order is None else seasonal_ma_order) 

258 seasonal_periods = ( 

259 0 if seasonal_periods is None else seasonal_periods) 

260 seasonal_order = (seasonal_ar_order, seasonal_diff, 

261 seasonal_ma_order, seasonal_periods) 

262 elif not has_seasonal_order: 

263 seasonal_order = (0, 0, 0, 0) 

264 

265 # Validate shapes of `order`, `seasonal_order` 

266 if len(order) != 3: 

267 raise ValueError('`order` argument must be an iterable with three' 

268 ' elements.') 

269 if len(seasonal_order) != 4: 

270 raise ValueError('`seasonal_order` argument must be an iterable' 

271 ' with four elements.') 

272 

273 # Validate differencing parameters 

274 if order[1] < 0: 

275 raise ValueError('Cannot specify negative differencing.') 

276 if order[1] != int(order[1]): 

277 raise ValueError('Cannot specify fractional differencing.') 

278 if seasonal_order[1] < 0: 

279 raise ValueError('Cannot specify negative seasonal differencing.') 

280 if seasonal_order[1] != int(seasonal_order[1]): 

281 raise ValueError('Cannot specify fractional seasonal' 

282 ' differencing.') 

283 if seasonal_order[3] < 0: 

284 raise ValueError('Cannot specify negative seasonal periodicity.') 

285 

286 # Standardize to integers or lists of integers 

287 order = ( 

288 standardize_lag_order(order[0], 'AR'), 

289 int(order[1]), 

290 standardize_lag_order(order[2], 'MA')) 

291 seasonal_order = ( 

292 standardize_lag_order(seasonal_order[0], 'seasonal AR'), 

293 int(seasonal_order[1]), 

294 standardize_lag_order(seasonal_order[2], 'seasonal MA'), 

295 int(seasonal_order[3])) 

296 

297 # Validate seasonals 

298 if seasonal_order[3] == 1: 

299 raise ValueError('Seasonal periodicity must be greater than 1.') 

300 if ((seasonal_order[0] != 0 or seasonal_order[1] != 0 or 

301 seasonal_order[2] != 0) and seasonal_order[3] == 0): 

302 raise ValueError('Must include nonzero seasonal periodicity if' 

303 ' including seasonal AR, MA, or differencing.') 

304 

305 # Basic order 

306 self.order = order 

307 self.ar_order, self.diff, self.ma_order = order 

308 

309 self.seasonal_order = seasonal_order 

310 (self.seasonal_ar_order, self.seasonal_diff, self.seasonal_ma_order, 

311 self.seasonal_periods) = seasonal_order 

312 

313 # Lists of included lags 

314 if isinstance(self.ar_order, list): 

315 self.ar_lags = self.ar_order 

316 else: 

317 self.ar_lags = np.arange(1, self.ar_order + 1).tolist() 

318 if isinstance(self.ma_order, list): 

319 self.ma_lags = self.ma_order 

320 else: 

321 self.ma_lags = np.arange(1, self.ma_order + 1).tolist() 

322 

323 if isinstance(self.seasonal_ar_order, list): 

324 self.seasonal_ar_lags = self.seasonal_ar_order 

325 else: 

326 self.seasonal_ar_lags = ( 

327 np.arange(1, self.seasonal_ar_order + 1).tolist()) 

328 if isinstance(self.seasonal_ma_order, list): 

329 self.seasonal_ma_lags = self.seasonal_ma_order 

330 else: 

331 self.seasonal_ma_lags = ( 

332 np.arange(1, self.seasonal_ma_order + 1).tolist()) 

333 

334 # Maximum lag orders 

335 self.max_ar_order = self.ar_lags[-1] if self.ar_lags else 0 

336 self.max_ma_order = self.ma_lags[-1] if self.ma_lags else 0 

337 

338 self.max_seasonal_ar_order = ( 

339 self.seasonal_ar_lags[-1] if self.seasonal_ar_lags else 0) 

340 self.max_seasonal_ma_order = ( 

341 self.seasonal_ma_lags[-1] if self.seasonal_ma_lags else 0) 

342 

343 self.max_reduced_ar_order = ( 

344 self.max_ar_order + 

345 self.max_seasonal_ar_order * self.seasonal_periods) 

346 self.max_reduced_ma_order = ( 

347 self.max_ma_order + 

348 self.max_seasonal_ma_order * self.seasonal_periods) 

349 

350 # Check that we don't have duplicate AR or MA lags from the seasonal 

351 # component 

352 ar_lags = set(self.ar_lags) 

353 seasonal_ar_lags = set(np.array(self.seasonal_ar_lags) 

354 * self.seasonal_periods) 

355 duplicate_ar_lags = ar_lags.intersection(seasonal_ar_lags) 

356 if len(duplicate_ar_lags) > 0: 

357 raise ValueError('Invalid model: autoregressive lag(s) %s are' 

358 ' in both the seasonal and non-seasonal' 

359 ' autoregressive components.' 

360 % duplicate_ar_lags) 

361 

362 ma_lags = set(self.ma_lags) 

363 seasonal_ma_lags = set(np.array(self.seasonal_ma_lags) 

364 * self.seasonal_periods) 

365 duplicate_ma_lags = ma_lags.intersection(seasonal_ma_lags) 

366 if len(duplicate_ma_lags) > 0: 

367 raise ValueError('Invalid model: moving average lag(s) %s are' 

368 ' in both the seasonal and non-seasonal' 

369 ' moving average components.' 

370 % duplicate_ma_lags) 

371 

372 # Handle trend 

373 self.trend_poly, _ = prepare_trend_spec(trend) 

374 # This contains the included exponents of the trend polynomial, 

375 # where e.g. the constant term has exponent 0, a linear trend has 

376 # exponent 1, etc. 

377 self.trend_terms = np.where(self.trend_poly == 1)[0] 

378 # Trend order is either the degree of the trend polynomial, if all 

379 # exponents are included, or a list of included exponents. Here we need 

380 # to make a distinction between a degree zero polynomial (i.e. a 

381 # constant) and the zero polynomial (i.e. not even a constant). The 

382 # former has `trend_order = 0`, while the latter has 

383 # `trend_order = None`. 

384 self.k_trend = len(self.trend_terms) 

385 if len(self.trend_terms) == 0: 

386 self.trend_order = None 

387 self.trend_degree = None 

388 elif np.all(self.trend_terms == np.arange(len(self.trend_terms))): 

389 self.trend_order = self.trend_terms[-1] 

390 self.trend_degree = self.trend_terms[-1] 

391 else: 

392 self.trend_order = self.trend_terms 

393 self.trend_degree = self.trend_terms[-1] 

394 

395 # Handle endog / exog 

396 # Standardize exog 

397 self.k_exog, exog = prepare_exog(exog) 

398 

399 # Standardize endog (including creating a faux endog if necessary) 

400 faux_endog = endog is None 

401 if endog is None: 

402 endog = [] if exog is None else np.zeros(len(exog)) * np.nan 

403 

404 # Add trend data into exog 

405 nobs = len(endog) if exog is None else len(exog) 

406 if self.trend_order is not None: 

407 trend_data = self.construct_trend_data(nobs, trend_offset) 

408 if exog is None: 

409 exog = trend_data 

410 elif _is_using_pandas(exog, None): 

411 trend_data = pd.DataFrame(trend_data, index=exog.index, 

412 columns=self.construct_trend_names()) 

413 exog = pd.concat([trend_data, exog], axis=1) 

414 else: 

415 exog = np.c_[trend_data, exog] 

416 

417 # Create an underlying time series model, to handle endog / exog, 

418 # especially validating shapes, retrieving names, and potentially 

419 # providing us with a time series index 

420 self._model = TimeSeriesModel(endog, exog=exog, dates=dates, freq=freq, 

421 missing=missing) 

422 self.endog = None if faux_endog else self._model.endog 

423 self.exog = self._model.exog 

424 

425 # Validate endog shape 

426 if not faux_endog and self.endog.ndim > 1 and self.endog.shape[1] > 1: 

427 raise ValueError('SARIMAX models require univariate `endog`. Got' 

428 ' shape %s.' % str(self.endog.shape)) 

429 

430 self._has_missing = ( 

431 None if faux_endog else np.any(np.isnan(self.endog))) 

432 

433 @property 

434 def is_ar_consecutive(self): 

435 """ 

436 (bool) Is autoregressive lag polynomial consecutive. 

437 

438 I.e. does it include all lags up to and including the maximum lag. 

439 """ 

440 return (self.max_seasonal_ar_order == 0 and 

441 not isinstance(self.ar_order, list)) 

442 

443 @property 

444 def is_ma_consecutive(self): 

445 """ 

446 (bool) Is moving average lag polynomial consecutive. 

447 

448 I.e. does it include all lags up to and including the maximum lag. 

449 """ 

450 return (self.max_seasonal_ma_order == 0 and 

451 not isinstance(self.ma_order, list)) 

452 

453 @property 

454 def is_integrated(self): 

455 """ 

456 (bool) Is the model integrated. 

457 

458 I.e. does it have a nonzero `diff` or `seasonal_diff`. 

459 """ 

460 return self.diff > 0 or self.seasonal_diff > 0 

461 

462 @property 

463 def is_seasonal(self): 

464 """(bool) Does the model include a seasonal component.""" 

465 return self.seasonal_periods != 0 

466 

467 @property 

468 def k_exog_params(self): 

469 """(int) Number of parameters associated with exogenous variables.""" 

470 return len(self.exog_names) 

471 

472 @property 

473 def k_ar_params(self): 

474 """(int) Number of autoregressive (non-seasonal) parameters.""" 

475 return len(self.ar_lags) 

476 

477 @property 

478 def k_ma_params(self): 

479 """(int) Number of moving average (non-seasonal) parameters.""" 

480 return len(self.ma_lags) 

481 

482 @property 

483 def k_seasonal_ar_params(self): 

484 """(int) Number of seasonal autoregressive parameters.""" 

485 return len(self.seasonal_ar_lags) 

486 

487 @property 

488 def k_seasonal_ma_params(self): 

489 """(int) Number of seasonal moving average parameters.""" 

490 return len(self.seasonal_ma_lags) 

491 

492 @property 

493 def k_params(self): 

494 """(int) Total number of model parameters.""" 

495 k_params = (self.k_exog_params + self.k_ar_params + self.k_ma_params + 

496 self.k_seasonal_ar_params + self.k_seasonal_ma_params) 

497 if not self.concentrate_scale: 

498 k_params += 1 

499 return k_params 

500 

501 @property 

502 def exog_names(self): 

503 """(list of str) Names associated with exogenous parameters.""" 

504 exog_names = self._model.exog_names 

505 return [] if exog_names is None else exog_names 

506 

507 @property 

508 def ar_names(self): 

509 """(list of str) Names of (non-seasonal) autoregressive parameters.""" 

510 return ['ar.L%d' % i for i in self.ar_lags] 

511 

512 @property 

513 def ma_names(self): 

514 """(list of str) Names of (non-seasonal) moving average parameters.""" 

515 return ['ma.L%d' % i for i in self.ma_lags] 

516 

517 @property 

518 def seasonal_ar_names(self): 

519 """(list of str) Names of seasonal autoregressive parameters.""" 

520 s = self.seasonal_periods 

521 return ['ar.S.L%d' % (i * s) for i in self.seasonal_ar_lags] 

522 

523 @property 

524 def seasonal_ma_names(self): 

525 """(list of str) Names of seasonal moving average parameters.""" 

526 s = self.seasonal_periods 

527 return ['ma.S.L%d' % (i * s) for i in self.seasonal_ma_lags] 

528 

529 @property 

530 def param_names(self): 

531 """(list of str) Names of all model parameters.""" 

532 names = (self.exog_names + self.ar_names + self.ma_names + 

533 self.seasonal_ar_names + self.seasonal_ma_names) 

534 if not self.concentrate_scale: 

535 names.append('sigma2') 

536 return names 

537 

538 @property 

539 def valid_estimators(self): 

540 """ 

541 (list of str) Estimators that could be used with specification. 

542 

543 Note: does not consider the presense of `exog` in determining valid 

544 estimators. If there are exogenous variables, then feasible Generalized 

545 Least Squares should be used through the `gls` estimator, and the 

546 `valid_estimators` are the estimators that could be passed as the 

547 `arma_estimator` argument to `gls`. 

548 """ 

549 estimators = set(['yule_walker', 'burg', 'innovations', 

550 'hannan_rissanen', 'innovations_mle', 'statespace']) 

551 

552 # Properties 

553 has_ar = self.max_ar_order != 0 

554 has_ma = self.max_ma_order != 0 

555 has_seasonal = self.seasonal_periods != 0 

556 

557 # Only state space can handle missing data or concentrated scale 

558 if self._has_missing: 

559 estimators.intersection_update(['statespace']) 

560 

561 # Only numerical MLE estimators can enforce restrictions 

562 if ((self.enforce_stationarity and self.max_ar_order > 0) or 

563 (self.enforce_invertibility and self.max_ma_order > 0)): 

564 estimators.intersection_update(['innovations_mle', 'statespace']) 

565 

566 # Innovations: no AR, non-consecutive MA, seasonal 

567 if has_ar or not self.is_ma_consecutive or has_seasonal: 

568 estimators.discard('innovations') 

569 # Yule-Walker/Burg: no MA, non-consecutive AR, seasonal 

570 if has_ma or not self.is_ar_consecutive or has_seasonal: 

571 estimators.discard('yule_walker') 

572 estimators.discard('burg') 

573 # Hannan-Rissanen: no seasonal 

574 if has_seasonal: 

575 estimators.discard('hannan_rissanen') 

576 # Innovations MLE: cannot have enforce_stationary=False or 

577 # concentratre_scale=True 

578 if self.enforce_stationarity is False or self.concentrate_scale: 

579 estimators.discard('innovations_mle') 

580 

581 return estimators 

582 

583 def validate_estimator(self, estimator): 

584 """ 

585 Validate an SARIMA estimator. 

586 

587 Parameters 

588 ---------- 

589 estimator : str 

590 Name of the estimator to validate against the current state of 

591 the specification. Possible values are: 'yule_walker', 'burg', 

592 'innovations', 'hannan_rissanen', 'innovoations_mle', 'statespace'. 

593 

594 Notes 

595 ----- 

596 This method will raise a `ValueError` if an invalid method is passed, 

597 and otherwise will return None. 

598 

599 This method does not consider the presense of `exog` in determining 

600 valid estimators. If there are exogenous variables, then feasible 

601 Generalized Least Squares should be used through the `gls` estimator, 

602 and a "valid" estimator is one that could be passed as the 

603 `arma_estimator` argument to `gls`. 

604 

605 This method only uses the attributes `enforce_stationarity` and 

606 `concentrate_scale` to determine the validity of numerical maximum 

607 likelihood estimators. These only include 'innovations_mle' (which 

608 does not support `enforce_stationarity=False` or 

609 `concentrate_scale=True`) and 'statespace' (which supports all 

610 combinations of each). 

611 

612 Examples 

613 -------- 

614 >>> spec = SARIMAXSpecification(order=(1, 0, 2)) 

615 

616 >>> spec.validate_estimator('yule_walker') 

617 ValueError: Yule-Walker estimator does not support moving average 

618 components. 

619 

620 >>> spec.validate_estimator('burg') 

621 ValueError: Burg estimator does not support moving average components. 

622 

623 >>> spec.validate_estimator('innovations') 

624 ValueError: Burg estimator does not support autoregressive components. 

625 

626 >>> spec.validate_estimator('hannan_rissanen') # returns None 

627 >>> spec.validate_estimator('innovations_mle') # returns None 

628 >>> spec.validate_estimator('statespace') # returns None 

629 

630 >>> spec.validate_estimator('not_an_estimator') 

631 ValueError: "not_an_estimator" is not a valid estimator. 

632 """ 

633 has_ar = self.max_ar_order != 0 

634 has_ma = self.max_ma_order != 0 

635 has_seasonal = self.seasonal_periods != 0 

636 has_missing = self._has_missing 

637 

638 titles = { 

639 'yule_walker': 'Yule-Walker', 

640 'burg': 'Burg', 

641 'innovations': 'Innovations', 

642 'hannan_rissanen': 'Hannan-Rissanen', 

643 'innovations_mle': 'Innovations MLE', 

644 'statespace': 'State space' 

645 } 

646 

647 # Only state space form can support missing data 

648 if estimator != 'statespace': 

649 if has_missing: 

650 raise ValueError('%s estimator does not support missing' 

651 ' values in `endog`.' % titles[estimator]) 

652 

653 # Only state space and innovations MLE can enforce parameter 

654 # restrictions 

655 if estimator not in ['innovations_mle', 'statespace']: 

656 if self.max_ar_order > 0 and self.enforce_stationarity: 

657 raise ValueError('%s estimator cannot enforce a stationary' 

658 ' autoregressive lag polynomial.' 

659 % titles[estimator]) 

660 if self.max_ma_order > 0 and self.enforce_invertibility: 

661 raise ValueError('%s estimator cannot enforce an invertible' 

662 ' moving average lag polynomial.' 

663 % titles[estimator]) 

664 

665 # Now go through specific disqualifications for each estimator 

666 if estimator in ['yule_walker', 'burg']: 

667 if has_seasonal: 

668 raise ValueError('%s estimator does not support seasonal' 

669 ' components.' % titles[estimator]) 

670 if not self.is_ar_consecutive: 

671 raise ValueError('%s estimator does not support' 

672 ' non-consecutive autoregressive lags.' 

673 % titles[estimator]) 

674 if has_ma: 

675 raise ValueError('%s estimator does not support moving average' 

676 ' components.' % titles[estimator]) 

677 elif estimator == 'innovations': 

678 if has_seasonal: 

679 raise ValueError('Innovations estimator does not support' 

680 ' seasonal components.') 

681 if not self.is_ma_consecutive: 

682 raise ValueError('Innovations estimator does not support' 

683 ' non-consecutive moving average lags.') 

684 if has_ar: 

685 raise ValueError('Innovations estimator does not support' 

686 ' autoregressive components.') 

687 elif estimator == 'hannan_rissanen': 

688 if has_seasonal: 

689 raise ValueError('Hannan-Rissanen estimator does not support' 

690 ' seasonal components.') 

691 elif estimator == 'innovations_mle': 

692 if self.enforce_stationarity is False: 

693 raise ValueError('Innovations MLE estimator does not support' 

694 ' non-stationary autoregressive components,' 

695 ' but `enforce_stationarity` is set to False') 

696 if self.concentrate_scale: 

697 raise ValueError('Innovations MLE estimator does not support' 

698 ' concentrating the scale out of the' 

699 ' log-likelihood function') 

700 elif estimator == 'statespace': 

701 # State space form supports all variations of SARIMAX. 

702 pass 

703 else: 

704 raise ValueError('"%s" is not a valid estimator.' % estimator) 

705 

706 def split_params(self, params, allow_infnan=False): 

707 """ 

708 Split parameter array by type into dictionary. 

709 

710 Parameters 

711 ---------- 

712 params : array_like 

713 Array of model parameters. 

714 allow_infnan : bool, optional 

715 Whether or not to allow `params` to contain -np.Inf, np.Inf, and 

716 np.nan. Default is False. 

717 

718 Returns 

719 ------- 

720 split_params : dict 

721 Dictionary with keys 'exog_params', 'ar_params', 'ma_params', 

722 'seasonal_ar_params', 'seasonal_ma_params', and (unless 

723 `concentrate_scale=True`) 'sigma2'. Values are the parameters 

724 associated with the key, based on the `params` argument. 

725 

726 Examples 

727 -------- 

728 >>> spec = SARIMAXSpecification(ar_order=1) 

729 >>> spec.split_params([0.5, 4]) 

730 {'exog_params': array([], dtype=float64), 

731 'ar_params': array([0.5]), 

732 'ma_params': array([], dtype=float64), 

733 'seasonal_ar_params': array([], dtype=float64), 

734 'seasonal_ma_params': array([], dtype=float64), 

735 'sigma2': 4.0} 

736 """ 

737 params = validate_basic(params, self.k_params, 

738 allow_infnan=allow_infnan, 

739 title='joint parameters') 

740 

741 ix = [self.k_exog_params, self.k_ar_params, self.k_ma_params, 

742 self.k_seasonal_ar_params, self.k_seasonal_ma_params] 

743 names = ['exog_params', 'ar_params', 'ma_params', 

744 'seasonal_ar_params', 'seasonal_ma_params'] 

745 if not self.concentrate_scale: 

746 ix.append(1) 

747 names.append('sigma2') 

748 ix = np.cumsum(ix) 

749 

750 out = dict(zip(names, np.split(params, ix))) 

751 if 'sigma2' in out: 

752 out['sigma2'] = out['sigma2'].item() 

753 

754 return out 

755 

756 def join_params(self, exog_params=None, ar_params=None, ma_params=None, 

757 seasonal_ar_params=None, seasonal_ma_params=None, 

758 sigma2=None): 

759 """ 

760 Join parameters into a single vector. 

761 

762 Parameters 

763 ---------- 

764 exog_params : array_like, optional 

765 Parameters associated with exogenous regressors. Required if 

766 `exog` is part of specification. 

767 ar_params : array_like, optional 

768 Parameters associated with (non-seasonal) autoregressive component. 

769 Required if this component is part of the specification. 

770 ma_params : array_like, optional 

771 Parameters associated with (non-seasonal) moving average component. 

772 Required if this component is part of the specification. 

773 seasonal_ar_params : array_like, optional 

774 Parameters associated with seasonal autoregressive component. 

775 Required if this component is part of the specification. 

776 seasonal_ma_params : array_like, optional 

777 Parameters associated with seasonal moving average component. 

778 Required if this component is part of the specification. 

779 sigma2 : array_like, optional 

780 Innovation variance parameter. Required unless 

781 `concentrated_scale=True`. 

782 

783 Returns 

784 ------- 

785 params : ndarray 

786 Array of parameters. 

787 

788 Examples 

789 -------- 

790 >>> spec = SARIMAXSpecification(ar_order=1) 

791 >>> spec.join_params(ar_params=0.5, sigma2=4) 

792 array([0.5, 4. ]) 

793 """ 

794 definitions = [ 

795 ('exogenous variables', self.k_exog_params, exog_params), 

796 ('AR terms', self.k_ar_params, ar_params), 

797 ('MA terms', self.k_ma_params, ma_params), 

798 ('seasonal AR terms', self.k_seasonal_ar_params, 

799 seasonal_ar_params), 

800 ('seasonal MA terms', self.k_seasonal_ma_params, 

801 seasonal_ma_params), 

802 ('variance', int(not self.concentrate_scale), sigma2)] 

803 

804 params_list = [] 

805 for title, k, params in definitions: 

806 if k > 0: 

807 # Validate 

808 if params is None: 

809 raise ValueError('Specification includes %s, but no' 

810 ' parameters were provided.' % title) 

811 params = np.atleast_1d(np.squeeze(params)) 

812 if not params.shape == (k,): 

813 raise ValueError('Specification included %d %s, but' 

814 ' parameters with shape %s were provided.' 

815 % (k, title, params.shape)) 

816 

817 # Otherwise add to the list 

818 params_list.append(params) 

819 

820 return np.concatenate(params_list) 

821 

822 def validate_params(self, params): 

823 """ 

824 Validate parameter vector by raising ValueError on invalid values. 

825 

826 Parameters 

827 ---------- 

828 params : array_like 

829 Array of model parameters. 

830 

831 Notes 

832 ----- 

833 Primarily checks that the parameters have the right shape and are not 

834 NaN or infinite. Also checks if parameters are consistent with a 

835 stationary process if `enforce_stationarity=True` and that they are 

836 consistent with an invertible process if `enforce_invertibility=True`. 

837 Finally, checks that the variance term is positive, unless 

838 `concentrate_scale=True`. 

839 

840 Examples 

841 -------- 

842 >>> spec = SARIMAXSpecification(ar_order=1) 

843 >>> spec.validate_params([-0.5, 4.]) # returns None 

844 >>> spec.validate_params([-0.5, -2]) 

845 ValueError: Non-positive variance term. 

846 >>> spec.validate_params([-1.5, 4.]) 

847 ValueError: Non-stationary autoregressive polynomial. 

848 """ 

849 # Note: split_params includes basic validation 

850 params = self.split_params(params) 

851 

852 # Specific checks 

853 if self.enforce_stationarity: 

854 if self.k_ar_params: 

855 ar_poly = np.r_[1, -params['ar_params']] 

856 if not is_invertible(ar_poly): 

857 raise ValueError('Non-stationary autoregressive' 

858 ' polynomial.') 

859 if self.k_seasonal_ar_params: 

860 seasonal_ar_poly = np.r_[1, -params['seasonal_ar_params']] 

861 if not is_invertible(seasonal_ar_poly): 

862 raise ValueError('Non-stationary seasonal autoregressive' 

863 ' polynomial.') 

864 

865 if self.enforce_invertibility: 

866 if self.k_ma_params: 

867 ma_poly = np.r_[1, params['ma_params']] 

868 if not is_invertible(ma_poly): 

869 raise ValueError('Non-invertible moving average' 

870 ' polynomial.') 

871 if self.k_seasonal_ma_params: 

872 seasonal_ma_poly = np.r_[1, params['seasonal_ma_params']] 

873 if not is_invertible(seasonal_ma_poly): 

874 raise ValueError('Non-invertible seasonal moving average' 

875 ' polynomial.') 

876 

877 if not self.concentrate_scale: 

878 if params['sigma2'] <= 0: 

879 raise ValueError('Non-positive variance term.') 

880 

881 def constrain_params(self, unconstrained): 

882 """ 

883 Constrain parameter values to be valid through transformations. 

884 

885 Parameters 

886 ---------- 

887 unconstrained : array_like 

888 Array of model unconstrained parameters. 

889 

890 Returns 

891 ------- 

892 constrained : ndarray 

893 Array of model parameters transformed to produce a valid model. 

894 

895 Notes 

896 ----- 

897 This is usually only used when performing numerical minimization 

898 of the log-likelihood function. This function is necessary because 

899 the minimizers consider values over the entire real space, while 

900 SARIMAX models require parameters in subspaces (for example positive 

901 variances). 

902 

903 Examples 

904 -------- 

905 >>> spec = SARIMAXSpecification(ar_order=1) 

906 >>> spec.constrain_params([10, -2]) 

907 array([-0.99504, 4. ]) 

908 """ 

909 unconstrained = self.split_params(unconstrained) 

910 params = {} 

911 

912 if self.k_exog_params: 

913 params['exog_params'] = unconstrained['exog_params'] 

914 if self.k_ar_params: 

915 if self.enforce_stationarity: 

916 params['ar_params'] = constrain(unconstrained['ar_params']) 

917 else: 

918 params['ar_params'] = unconstrained['ar_params'] 

919 if self.k_ma_params: 

920 if self.enforce_invertibility: 

921 params['ma_params'] = -constrain(unconstrained['ma_params']) 

922 else: 

923 params['ma_params'] = unconstrained['ma_params'] 

924 if self.k_seasonal_ar_params: 

925 if self.enforce_stationarity: 

926 params['seasonal_ar_params'] = ( 

927 constrain(unconstrained['seasonal_ar_params'])) 

928 else: 

929 params['seasonal_ar_params'] = ( 

930 unconstrained['seasonal_ar_params']) 

931 if self.k_seasonal_ma_params: 

932 if self.enforce_invertibility: 

933 params['seasonal_ma_params'] = ( 

934 -constrain(unconstrained['seasonal_ma_params'])) 

935 else: 

936 params['seasonal_ma_params'] = ( 

937 unconstrained['seasonal_ma_params']) 

938 if not self.concentrate_scale: 

939 params['sigma2'] = unconstrained['sigma2']**2 

940 

941 return self.join_params(**params) 

942 

943 def unconstrain_params(self, constrained): 

944 """ 

945 Reverse transformations used to constrain parameter values to be valid. 

946 

947 Parameters 

948 ---------- 

949 constrained : array_like 

950 Array of model parameters. 

951 

952 Returns 

953 ------- 

954 unconstrained : ndarray 

955 Array of parameters with constraining transformions reversed. 

956 

957 Notes 

958 ----- 

959 This is usually only used when performing numerical minimization 

960 of the log-likelihood function. This function is the (approximate) 

961 inverse of `constrain_params`. 

962 

963 Examples 

964 -------- 

965 >>> spec = SARIMAXSpecification(ar_order=1) 

966 >>> spec.unconstrain_params([-0.5, 4.]) 

967 array([0.57735, 2. ]) 

968 """ 

969 constrained = self.split_params(constrained) 

970 params = {} 

971 

972 if self.k_exog_params: 

973 params['exog_params'] = constrained['exog_params'] 

974 if self.k_ar_params: 

975 if self.enforce_stationarity: 

976 params['ar_params'] = unconstrain(constrained['ar_params']) 

977 else: 

978 params['ar_params'] = constrained['ar_params'] 

979 if self.k_ma_params: 

980 if self.enforce_invertibility: 

981 params['ma_params'] = unconstrain(-constrained['ma_params']) 

982 else: 

983 params['ma_params'] = constrained['ma_params'] 

984 if self.k_seasonal_ar_params: 

985 if self.enforce_stationarity: 

986 params['seasonal_ar_params'] = ( 

987 unconstrain(constrained['seasonal_ar_params'])) 

988 else: 

989 params['seasonal_ar_params'] = ( 

990 constrained['seasonal_ar_params']) 

991 if self.k_seasonal_ma_params: 

992 if self.enforce_invertibility: 

993 params['seasonal_ma_params'] = ( 

994 unconstrain(-constrained['seasonal_ma_params'])) 

995 else: 

996 params['seasonal_ma_params'] = ( 

997 constrained['seasonal_ma_params']) 

998 if not self.concentrate_scale: 

999 params['sigma2'] = constrained['sigma2']**0.5 

1000 

1001 return self.join_params(**params) 

1002 

1003 def construct_trend_data(self, nobs, offset=1): 

1004 if self.trend_order is None: 

1005 trend_data = None 

1006 else: 

1007 trend_data = prepare_trend_data( 

1008 self.trend_poly, int(np.sum(self.trend_poly)), nobs, offset) 

1009 

1010 return trend_data 

1011 

1012 def construct_trend_names(self): 

1013 names = [] 

1014 for i in self.trend_terms: 

1015 if i == 0: 

1016 names.append('const') 

1017 elif i == 1: 

1018 names.append('drift') 

1019 else: 

1020 names.append('trend.%d' % i) 

1021 return names 

1022 

1023 def __repr__(self): 

1024 """Represent SARIMAXSpecification object as a string.""" 

1025 components = [] 

1026 if self.endog is not None: 

1027 components.append('endog=%s' % self._model.endog_names) 

1028 if self.k_exog_params: 

1029 components.append('exog=%s' % self.exog_names) 

1030 components.append('order=%s' % str(self.order)) 

1031 if self.seasonal_periods > 0: 

1032 components.append('seasonal_order=%s' % str(self.seasonal_order)) 

1033 if self.enforce_stationarity is not None: 

1034 components.append('enforce_stationarity=%s' 

1035 % self.enforce_stationarity) 

1036 if self.enforce_invertibility is not None: 

1037 components.append('enforce_invertibility=%s' 

1038 % self.enforce_invertibility) 

1039 if self.concentrate_scale is not None: 

1040 components.append('concentrate_scale=%s' % self.concentrate_scale) 

1041 return 'SARIMAXSpecification(%s)' % ', '.join(components)