Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Run x12/x13-arima specs in a subprocess from Python and curry results back 

3into python. 

4 

5Notes 

6----- 

7Many of the functions are called x12. However, they are also intended to work 

8for x13. If this is not the case, it's a bug. 

9""" 

10from statsmodels.compat.pandas import deprecate_kwarg 

11 

12import os 

13import subprocess 

14import tempfile 

15import re 

16from warnings import warn 

17 

18import pandas as pd 

19 

20from statsmodels.compat.python import iteritems 

21from statsmodels.tools.tools import Bunch 

22from statsmodels.tools.sm_exceptions import (X13NotFoundError, 

23 IOWarning, X13Error, 

24 X13Warning) 

25 

26__all__ = ["x13_arima_select_order", "x13_arima_analysis"] 

27 

28_binary_names = ('x13as.exe', 'x13as', 'x12a.exe', 'x12a') 

29 

30 

31class _freq_to_period: 

32 def __getitem__(self, key): 

33 if key.startswith('M'): 

34 return 12 

35 elif key.startswith('Q'): 

36 return 4 

37 elif key.startswith('W'): 

38 return 52 

39 

40 

41_freq_to_period = _freq_to_period() 

42 

43_period_to_freq = {12: 'M', 4: 'Q'} 

44_log_to_x12 = {True: 'log', False: 'none', None: 'auto'} 

45_bool_to_yes_no = lambda x: 'yes' if x else 'no' # noqa:E731 

46 

47 

48def _find_x12(x12path=None, prefer_x13=True): 

49 """ 

50 If x12path is not given, then either x13as[.exe] or x12a[.exe] must 

51 be found on the PATH. Otherwise, the environmental variable X12PATH or 

52 X13PATH must be defined. If prefer_x13 is True, only X13PATH is searched 

53 for. If it is false, only X12PATH is searched for. 

54 """ 

55 global _binary_names 

56 if x12path is not None and x12path.endswith(_binary_names): 

57 # remove binary from path if given 

58 x12path = os.path.dirname(x12path) 

59 

60 if not prefer_x13: # search for x12 first 

61 _binary_names = _binary_names[::-1] 

62 if x12path is None: 

63 x12path = os.getenv("X12PATH", "") 

64 if not x12path: 

65 x12path = os.getenv("X13PATH", "") 

66 elif x12path is None: 

67 x12path = os.getenv("X13PATH", "") 

68 if not x12path: 

69 x12path = os.getenv("X12PATH", "") 

70 

71 for binary in _binary_names: 

72 x12 = os.path.join(x12path, binary) 

73 try: 

74 subprocess.check_call(x12, stdout=subprocess.PIPE, 

75 stderr=subprocess.PIPE) 

76 return x12 

77 except OSError: 

78 pass 

79 

80 else: 

81 return False 

82 

83 

84def _check_x12(x12path=None): 

85 x12path = _find_x12(x12path) 

86 if not x12path: 

87 raise X13NotFoundError("x12a and x13as not found on path. Give the " 

88 "path, put them on PATH, or set the " 

89 "X12PATH or X13PATH environmental variable.") 

90 return x12path 

91 

92 

93def _clean_order(order): 

94 """ 

95 Takes something like (1 1 0)(0 1 1) and returns a arma order, sarma 

96 order tuple. Also accepts (1 1 0) and return arma order and (0, 0, 0) 

97 """ 

98 order = re.findall(r"\([0-9 ]*?\)", order) 

99 

100 def clean(x): 

101 return tuple(map(int, re.sub("[()]", "", x).split(" "))) 

102 

103 if len(order) > 1: 

104 order, sorder = map(clean, order) 

105 else: 

106 order = clean(order[0]) 

107 sorder = (0, 0, 0) 

108 

109 return order, sorder 

110 

111 

112def run_spec(x12path, specpath, outname=None, meta=False, datameta=False): 

113 

114 if meta and datameta: 

115 raise ValueError("Cannot specify both meta and datameta.") 

116 if meta: 

117 args = [x12path, "-m " + specpath] 

118 elif datameta: 

119 args = [x12path, "-d " + specpath] 

120 else: 

121 args = [x12path, specpath] 

122 

123 if outname: 

124 args += [outname] 

125 

126 return subprocess.Popen(args, stdout=subprocess.PIPE, 

127 stderr=subprocess.STDOUT) 

128 

129 

130def _make_automdl_options(maxorder, maxdiff, diff): 

131 options = "\n" 

132 options += "maxorder = ({0} {1})\n".format(maxorder[0], maxorder[1]) 

133 if maxdiff is not None: # maxdiff always takes precedence 

134 options += "maxdiff = ({0} {1})\n".format(maxdiff[0], maxdiff[1]) 

135 else: 

136 options += "diff = ({0} {1})\n".format(diff[0], diff[1]) 

137 return options 

138 

139 

140def _make_var_names(exog): 

141 if hasattr(exog, "name"): 

142 var_names = exog.name 

143 elif hasattr(exog, "columns"): 

144 var_names = exog.columns 

145 else: 

146 raise ValueError("exog is not a Series or DataFrame or is unnamed.") 

147 try: 

148 var_names = " ".join(var_names) 

149 except TypeError: # cannot have names that are numbers, pandas default 

150 from statsmodels.base.data import _make_exog_names 

151 if exog.ndim == 1: 

152 var_names = "x1" 

153 else: 

154 var_names = " ".join(_make_exog_names(exog)) 

155 return var_names 

156 

157 

158def _make_regression_options(trading, exog): 

159 if not trading and exog is None: # start regression spec 

160 return "" 

161 

162 reg_spec = "regression{\n" 

163 if trading: 

164 reg_spec += " variables = (td)\n" 

165 if exog is not None: 

166 var_names = _make_var_names(exog) 

167 reg_spec += " user = ({0})\n".format(var_names) 

168 reg_spec += " data = ({0})\n".format("\n".join(map(str, 

169 exog.values.ravel().tolist()))) 

170 

171 reg_spec += "}\n" # close out regression spec 

172 return reg_spec 

173 

174 

175def _make_forecast_options(forecast_periods): 

176 if forecast_periods is None: 

177 return "" 

178 forecast_spec = "forecast{\n" 

179 forecast_spec += "maxlead = ({0})\n}}\n".format(forecast_periods) 

180 return forecast_spec 

181 

182 

183def _check_errors(errors): 

184 errors = errors[errors.find("spc:")+4:].strip() 

185 if errors and 'ERROR' in errors: 

186 raise X13Error(errors) 

187 elif errors and 'WARNING' in errors: 

188 warn(errors, X13Warning) 

189 

190 

191def _convert_out_to_series(x, dates, name): 

192 """ 

193 Convert x to a DataFrame where x is a string in the format given by 

194 x-13arima-seats output. 

195 """ 

196 from io import StringIO 

197 from pandas import read_csv 

198 out = read_csv(StringIO(x), skiprows=2, 

199 header=None, sep='\t', engine='python') 

200 return out.set_index(dates).rename(columns={1: name})[name] 

201 

202 

203def _open_and_read(fname): 

204 # opens a file, reads it, and make sure it's closed 

205 with open(fname, 'r') as fin: 

206 fout = fin.read() 

207 return fout 

208 

209 

210class Spec(object): 

211 @property 

212 def spec_name(self): 

213 return self.__class__.__name__.replace("Spec", "") 

214 

215 def create_spec(self, **kwargs): 

216 spec = """{name} {{ 

217 {options} 

218 }} 

219 """ 

220 return spec.format(name=self.spec_name, 

221 options=self.options) 

222 

223 def set_options(self, **kwargs): 

224 options = "" 

225 for key, value in iteritems(kwargs): 

226 options += "{0}={1}\n".format(key, value) 

227 self.__dict__.update({key: value}) 

228 self.options = options 

229 

230 

231class SeriesSpec(Spec): 

232 """ 

233 Parameters 

234 ---------- 

235 data 

236 appendbcst : bool 

237 appendfcst : bool 

238 comptype 

239 compwt 

240 decimals 

241 modelspan 

242 name 

243 period 

244 precision 

245 to_print 

246 to_save 

247 span 

248 start 

249 title 

250 type 

251 

252 Notes 

253 ----- 

254 Rarely used arguments 

255 

256 divpower 

257 missingcode 

258 missingval 

259 saveprecision 

260 trimzero 

261 """ 

262 def __init__(self, data, name='Unnamed Series', appendbcst=False, 

263 appendfcst=False, 

264 comptype=None, compwt=1, decimals=0, modelspan=(), 

265 period=12, precision=0, to_print=[], to_save=[], span=(), 

266 start=(1, 1), title='', series_type=None, divpower=None, 

267 missingcode=-99999, missingval=1000000000): 

268 

269 appendbcst, appendfcst = map(_bool_to_yes_no, [appendbcst, 

270 appendfcst, 

271 ]) 

272 

273 series_name = "\"{0}\"".format(name[:64]) # trim to 64 characters 

274 title = "\"{0}\"".format(title[:79]) # trim to 79 characters 

275 self.set_options(data=data, appendbcst=appendbcst, 

276 appendfcst=appendfcst, period=period, start=start, 

277 title=title, name=series_name, 

278 ) 

279 

280 

281def pandas_to_series_spec(x): 

282 # from statsmodels.tools.data import _check_period_index 

283 # check_period_index(x) 

284 if hasattr(x, 'columns'): # convert to series 

285 if len(x.columns) > 1: 

286 raise ValueError("Does not handle DataFrame with more than one " 

287 "column") 

288 x = x[x.columns[0]] 

289 

290 data = "({0})".format("\n".join(map(str, x.values.tolist()))) 

291 

292 # get periodicity 

293 # get start / first data 

294 # give it a title 

295 try: 

296 period = _freq_to_period[x.index.freqstr] 

297 except (AttributeError, ValueError): 

298 from pandas.tseries.api import infer_freq 

299 period = _freq_to_period[infer_freq(x.index)] 

300 start_date = x.index[0] 

301 if period == 12: 

302 year, stperiod = start_date.year, start_date.month 

303 elif period == 4: 

304 year, stperiod = start_date.year, start_date.quarter 

305 else: # pragma: no cover 

306 raise ValueError("Only monthly and quarterly periods are supported." 

307 " Please report or send a pull request if you want " 

308 "this extended.") 

309 

310 if hasattr(x, 'name'): 

311 name = x.name or "Unnamed Series" 

312 else: 

313 name = 'Unnamed Series' 

314 series_spec = SeriesSpec(data=data, name=name, period=period, 

315 title=name, start="{0}.{1}".format(year, 

316 stperiod)) 

317 return series_spec 

318 

319 

320@deprecate_kwarg('forecast_years', 'forecast_periods') 

321def x13_arima_analysis(endog, maxorder=(2, 1), maxdiff=(2, 1), diff=None, 

322 exog=None, log=None, outlier=True, trading=False, 

323 forecast_periods=None, retspec=False, 

324 speconly=False, start=None, freq=None, 

325 print_stdout=False, x12path=None, prefer_x13=True): 

326 """ 

327 Perform x13-arima analysis for monthly or quarterly data. 

328 

329 Parameters 

330 ---------- 

331 endog : array_like, pandas.Series 

332 The series to model. It is best to use a pandas object with a 

333 DatetimeIndex or PeriodIndex. However, you can pass an array-like 

334 object. If your object does not have a dates index then ``start`` and 

335 ``freq`` are not optional. 

336 maxorder : tuple 

337 The maximum order of the regular and seasonal ARMA polynomials to 

338 examine during the model identification. The order for the regular 

339 polynomial must be greater than zero and no larger than 4. The 

340 order for the seasonal polynomial may be 1 or 2. 

341 maxdiff : tuple 

342 The maximum orders for regular and seasonal differencing in the 

343 automatic differencing procedure. Acceptable inputs for regular 

344 differencing are 1 and 2. The maximum order for seasonal differencing 

345 is 1. If ``diff`` is specified then ``maxdiff`` should be None. 

346 Otherwise, ``diff`` will be ignored. See also ``diff``. 

347 diff : tuple 

348 Fixes the orders of differencing for the regular and seasonal 

349 differencing. Regular differencing may be 0, 1, or 2. Seasonal 

350 differencing may be 0 or 1. ``maxdiff`` must be None, otherwise 

351 ``diff`` is ignored. 

352 exog : array_like 

353 Exogenous variables. 

354 log : bool or None 

355 If None, it is automatically determined whether to log the series or 

356 not. If False, logs are not taken. If True, logs are taken. 

357 outlier : bool 

358 Whether or not outliers are tested for and corrected, if detected. 

359 trading : bool 

360 Whether or not trading day effects are tested for. 

361 forecast_periods : int 

362 Number of forecasts produced. The default is None. 

363 retspec : bool 

364 Whether to return the created specification file. Can be useful for 

365 debugging. 

366 speconly : bool 

367 Whether to create the specification file and then return it without 

368 performing the analysis. Can be useful for debugging. 

369 start : str, datetime 

370 Must be given if ``endog`` does not have date information in its index. 

371 Anything accepted by pandas.DatetimeIndex for the start value. 

372 freq : str 

373 Must be givein if ``endog`` does not have date information in its 

374 index. Anything accepted by pandas.DatetimeIndex for the freq value. 

375 print_stdout : bool 

376 The stdout from X12/X13 is suppressed. To print it out, set this 

377 to True. Default is False. 

378 x12path : str or None 

379 The path to x12 or x13 binary. If None, the program will attempt 

380 to find x13as or x12a on the PATH or by looking at X13PATH or 

381 X12PATH depending on the value of prefer_x13. 

382 prefer_x13 : bool 

383 If True, will look for x13as first and will fallback to the X13PATH 

384 environmental variable. If False, will look for x12a first and will 

385 fallback to the X12PATH environmental variable. If x12path points 

386 to the path for the X12/X13 binary, it does nothing. 

387 

388 Returns 

389 ------- 

390 Bunch 

391 A bunch object containing the listed attributes. 

392 

393 - results : str 

394 The full output from the X12/X13 run. 

395 - seasadj : pandas.Series 

396 The final seasonally adjusted ``endog``. 

397 - trend : pandas.Series 

398 The trend-cycle component of ``endog``. 

399 - irregular : pandas.Series 

400 The final irregular component of ``endog``. 

401 - stdout : str 

402 The captured stdout produced by x12/x13. 

403 - spec : str, optional 

404 Returned if ``retspec`` is True. The only thing returned if 

405 ``speconly`` is True. 

406 

407 Notes 

408 ----- 

409 This works by creating a specification file, writing it to a temporary 

410 directory, invoking X12/X13 in a subprocess, and reading the output 

411 directory, invoking exog12/X13 in a subprocess, and reading the output 

412 back in. 

413 """ 

414 x12path = _check_x12(x12path) 

415 

416 if not isinstance(endog, (pd.DataFrame, pd.Series)): 

417 if start is None or freq is None: 

418 raise ValueError("start and freq cannot be none if endog is not " 

419 "a pandas object") 

420 endog = pd.Series(endog, index=pd.DatetimeIndex(start=start, 

421 periods=len(endog), 

422 freq=freq)) 

423 spec_obj = pandas_to_series_spec(endog) 

424 spec = spec_obj.create_spec() 

425 spec += "transform{{function={0}}}\n".format(_log_to_x12[log]) 

426 if outlier: 

427 spec += "outlier{}\n" 

428 options = _make_automdl_options(maxorder, maxdiff, diff) 

429 spec += "automdl{{{0}}}\n".format(options) 

430 spec += _make_regression_options(trading, exog) 

431 spec += _make_forecast_options(forecast_periods) 

432 spec += "x11{ save=(d11 d12 d13) }" 

433 if speconly: 

434 return spec 

435 # write it to a tempfile 

436 # TODO: make this more robust - give the user some control? 

437 ftempin = tempfile.NamedTemporaryFile(delete=False, suffix='.spc') 

438 ftempout = tempfile.NamedTemporaryFile(delete=False) 

439 try: 

440 ftempin.write(spec.encode('utf8')) 

441 ftempin.close() 

442 ftempout.close() 

443 # call x12 arima 

444 p = run_spec(x12path, ftempin.name[:-4], ftempout.name) 

445 p.wait() 

446 stdout = p.stdout.read() 

447 if print_stdout: 

448 print(p.stdout.read()) 

449 # check for errors 

450 errors = _open_and_read(ftempout.name + '.err') 

451 _check_errors(errors) 

452 

453 # read in results 

454 results = _open_and_read(ftempout.name + '.out') 

455 seasadj = _open_and_read(ftempout.name + '.d11') 

456 trend = _open_and_read(ftempout.name + '.d12') 

457 irregular = _open_and_read(ftempout.name + '.d13') 

458 finally: 

459 try: # sometimes this gives a permission denied error? 

460 # not sure why. no process should have these open 

461 os.remove(ftempin.name) 

462 os.remove(ftempout.name) 

463 except OSError: 

464 if os.path.exists(ftempin.name): 

465 warn("Failed to delete resource {0}".format(ftempin.name), 

466 IOWarning) 

467 if os.path.exists(ftempout.name): 

468 warn("Failed to delete resource {0}".format(ftempout.name), 

469 IOWarning) 

470 

471 seasadj = _convert_out_to_series(seasadj, endog.index, 'seasadj') 

472 trend = _convert_out_to_series(trend, endog.index, 'trend') 

473 irregular = _convert_out_to_series(irregular, endog.index, 'irregular') 

474 

475 # NOTE: there is not likely anything in stdout that's not in results 

476 # so may be safe to just suppress and remove it 

477 if not retspec: 

478 res = X13ArimaAnalysisResult(observed=endog, results=results, 

479 seasadj=seasadj, trend=trend, 

480 irregular=irregular, stdout=stdout) 

481 else: 

482 res = X13ArimaAnalysisResult(observed=endog, results=results, 

483 seasadj=seasadj, trend=trend, 

484 irregular=irregular, stdout=stdout, 

485 spec=spec) 

486 return res 

487 

488 

489@deprecate_kwarg('forecast_years', 'forecast_periods') 

490def x13_arima_select_order(endog, maxorder=(2, 1), maxdiff=(2, 1), diff=None, 

491 exog=None, log=None, outlier=True, trading=False, 

492 forecast_periods=None, 

493 start=None, freq=None, print_stdout=False, 

494 x12path=None, prefer_x13=True): 

495 """ 

496 Perform automatic seasonal ARIMA order identification using x12/x13 ARIMA. 

497 

498 Parameters 

499 ---------- 

500 endog : array_like, pandas.Series 

501 The series to model. It is best to use a pandas object with a 

502 DatetimeIndex or PeriodIndex. However, you can pass an array-like 

503 object. If your object does not have a dates index then ``start`` and 

504 ``freq`` are not optional. 

505 maxorder : tuple 

506 The maximum order of the regular and seasonal ARMA polynomials to 

507 examine during the model identification. The order for the regular 

508 polynomial must be greater than zero and no larger than 4. The 

509 order for the seasonal polynomial may be 1 or 2. 

510 maxdiff : tuple 

511 The maximum orders for regular and seasonal differencing in the 

512 automatic differencing procedure. Acceptable inputs for regular 

513 differencing are 1 and 2. The maximum order for seasonal differencing 

514 is 1. If ``diff`` is specified then ``maxdiff`` should be None. 

515 Otherwise, ``diff`` will be ignored. See also ``diff``. 

516 diff : tuple 

517 Fixes the orders of differencing for the regular and seasonal 

518 differencing. Regular differencing may be 0, 1, or 2. Seasonal 

519 differencing may be 0 or 1. ``maxdiff`` must be None, otherwise 

520 ``diff`` is ignored. 

521 exog : array_like 

522 Exogenous variables. 

523 log : bool or None 

524 If None, it is automatically determined whether to log the series or 

525 not. If False, logs are not taken. If True, logs are taken. 

526 outlier : bool 

527 Whether or not outliers are tested for and corrected, if detected. 

528 trading : bool 

529 Whether or not trading day effects are tested for. 

530 forecast_periods : int 

531 Number of forecasts produced. The default is None. 

532 start : str, datetime 

533 Must be given if ``endog`` does not have date information in its index. 

534 Anything accepted by pandas.DatetimeIndex for the start value. 

535 freq : str 

536 Must be givein if ``endog`` does not have date information in its 

537 index. Anything accepted by pandas.DatetimeIndex for the freq value. 

538 print_stdout : bool 

539 The stdout from X12/X13 is suppressed. To print it out, set this 

540 to True. Default is False. 

541 x12path : str or None 

542 The path to x12 or x13 binary. If None, the program will attempt 

543 to find x13as or x12a on the PATH or by looking at X13PATH or X12PATH 

544 depending on the value of prefer_x13. 

545 prefer_x13 : bool 

546 If True, will look for x13as first and will fallback to the X13PATH 

547 environmental variable. If False, will look for x12a first and will 

548 fallback to the X12PATH environmental variable. If x12path points 

549 to the path for the X12/X13 binary, it does nothing. 

550 

551 Returns 

552 ------- 

553 Bunch 

554 A bunch object containing the listed attributes. 

555 

556 - order : tuple 

557 The regular order. 

558 - sorder : tuple 

559 The seasonal order. 

560 - include_mean : bool 

561 Whether to include a mean or not. 

562 - results : str 

563 The full results from the X12/X13 analysis. 

564 - stdout : str 

565 The captured stdout from the X12/X13 analysis. 

566 

567 Notes 

568 ----- 

569 This works by creating a specification file, writing it to a temporary 

570 directory, invoking X12/X13 in a subprocess, and reading the output back 

571 in. 

572 """ 

573 results = x13_arima_analysis(endog, x12path=x12path, exog=exog, log=log, 

574 outlier=outlier, trading=trading, 

575 forecast_periods=forecast_periods, 

576 maxorder=maxorder, maxdiff=maxdiff, diff=diff, 

577 start=start, freq=freq, prefer_x13=prefer_x13) 

578 model = re.search("(?<=Final automatic model choice : ).*", 

579 results.results) 

580 order = model.group() 

581 if re.search("Mean is not significant", results.results): 

582 include_mean = False 

583 elif re.search("Constant", results.results): 

584 include_mean = True 

585 else: 

586 include_mean = False 

587 order, sorder = _clean_order(order) 

588 res = Bunch(order=order, sorder=sorder, include_mean=include_mean, 

589 results=results.results, stdout=results.stdout) 

590 return res 

591 

592 

593class X13ArimaAnalysisResult(object): 

594 def __init__(self, **kwargs): 

595 for key, value in iteritems(kwargs): 

596 setattr(self, key, value) 

597 

598 def plot(self): 

599 from statsmodels.graphics.utils import _import_mpl 

600 plt = _import_mpl() 

601 fig, axes = plt.subplots(4, 1, sharex=True) 

602 self.observed.plot(ax=axes[0], legend=False) 

603 axes[0].set_ylabel('Observed') 

604 self.seasadj.plot(ax=axes[1], legend=False) 

605 axes[1].set_ylabel('Seas. Adjusted') 

606 self.trend.plot(ax=axes[2], legend=False) 

607 axes[2].set_ylabel('Trend') 

608 self.irregular.plot(ax=axes[3], legend=False) 

609 axes[3].set_ylabel('Irregular') 

610 

611 fig.tight_layout() 

612 return fig