Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Seasonal Decomposition by Moving Averages 

3""" 

4from statsmodels.compat.pandas import deprecate_kwarg 

5 

6import numpy as np 

7import pandas as pd 

8from pandas.core.nanops import nanmean as pd_nanmean 

9from statsmodels.tsa._stl import STL 

10 

11from statsmodels.tools.validation import array_like, PandasWrapper 

12from statsmodels.tsa.tsatools import freq_to_period 

13from .filters.filtertools import convolution_filter 

14 

15__all__ = ['STL', 'seasonal_decompose', 'seasonal_mean', 'DecomposeResult'] 

16 

17 

18def _extrapolate_trend(trend, npoints): 

19 """ 

20 Replace nan values on trend's end-points with least-squares extrapolated 

21 values with regression considering npoints closest defined points. 

22 """ 

23 front = next(i for i, vals in enumerate(trend) 

24 if not np.any(np.isnan(vals))) 

25 back = trend.shape[0] - 1 - next(i for i, vals in enumerate(trend[::-1]) 

26 if not np.any(np.isnan(vals))) 

27 front_last = min(front + npoints, back) 

28 back_first = max(front, back - npoints) 

29 

30 k, n = np.linalg.lstsq( 

31 np.c_[np.arange(front, front_last), np.ones(front_last - front)], 

32 trend[front:front_last], rcond=-1)[0] 

33 extra = (np.arange(0, front) * np.c_[k] + np.c_[n]).T 

34 if trend.ndim == 1: 

35 extra = extra.squeeze() 

36 trend[:front] = extra 

37 

38 k, n = np.linalg.lstsq( 

39 np.c_[np.arange(back_first, back), np.ones(back - back_first)], 

40 trend[back_first:back], rcond=-1)[0] 

41 extra = (np.arange(back + 1, trend.shape[0]) * np.c_[k] + np.c_[n]).T 

42 if trend.ndim == 1: 

43 extra = extra.squeeze() 

44 trend[back + 1:] = extra 

45 

46 return trend 

47 

48 

49@deprecate_kwarg('freq', 'period') 

50def seasonal_mean(x, period): 

51 """ 

52 Return means for each period in x. period is an int that gives the 

53 number of periods per cycle. E.g., 12 for monthly. NaNs are ignored 

54 in the mean. 

55 """ 

56 return np.array([pd_nanmean(x[i::period], axis=0) for i in range(period)]) 

57 

58 

59@deprecate_kwarg('freq', 'period') 

60def seasonal_decompose(x, model="additive", filt=None, period=None, 

61 two_sided=True, extrapolate_trend=0): 

62 """ 

63 Seasonal decomposition using moving averages. 

64 

65 Parameters 

66 ---------- 

67 x : array_like 

68 Time series. If 2d, individual series are in columns. x must contain 2 

69 complete cycles. 

70 model : {"additive", "multiplicative"}, optional 

71 Type of seasonal component. Abbreviations are accepted. 

72 filt : array_like, optional 

73 The filter coefficients for filtering out the seasonal component. 

74 The concrete moving average method used in filtering is determined by 

75 two_sided. 

76 period : int, optional 

77 Period of the series. Must be used if x is not a pandas object or if 

78 the index of x does not have a frequency. Overrides default 

79 periodicity of x if x is a pandas object with a timeseries index. 

80 two_sided : bool, optional 

81 The moving average method used in filtering. 

82 If True (default), a centered moving average is computed using the 

83 filt. If False, the filter coefficients are for past values only. 

84 extrapolate_trend : int or 'freq', optional 

85 If set to > 0, the trend resulting from the convolution is 

86 linear least-squares extrapolated on both ends (or the single one 

87 if two_sided is False) considering this many (+1) closest points. 

88 If set to 'freq', use `freq` closest points. Setting this parameter 

89 results in no NaN values in trend or resid components. 

90 

91 Returns 

92 ------- 

93 DecomposeResult 

94 A object with seasonal, trend, and resid attributes. 

95 

96 See Also 

97 -------- 

98 statsmodels.tsa.filters.bk_filter.bkfilter 

99 Baxter-King filter. 

100 statsmodels.tsa.filters.cf_filter.cffilter 

101 Christiano-Fitzgerald asymmetric, random walk filter. 

102 statsmodels.tsa.filters.hp_filter.hpfilter 

103 Hodrick-Prescott filter. 

104 statsmodels.tsa.filters.convolution_filter 

105 Linear filtering via convolution. 

106 statsmodels.tsa.seasonal.STL 

107 Season-Trend decomposition using LOESS. 

108 

109 Notes 

110 ----- 

111 This is a naive decomposition. More sophisticated methods should 

112 be preferred. 

113 

114 The additive model is Y[t] = T[t] + S[t] + e[t] 

115 

116 The multiplicative model is Y[t] = T[t] * S[t] * e[t] 

117 

118 The seasonal component is first removed by applying a convolution 

119 filter to the data. The average of this smoothed series for each 

120 period is the returned seasonal component. 

121 """ 

122 pfreq = period 

123 pw = PandasWrapper(x) 

124 if period is None: 

125 pfreq = getattr(getattr(x, 'index', None), 'inferred_freq', None) 

126 

127 x = array_like(x, 'x', maxdim=2) 

128 nobs = len(x) 

129 

130 if not np.all(np.isfinite(x)): 

131 raise ValueError("This function does not handle missing values") 

132 if model.startswith('m'): 

133 if np.any(x <= 0): 

134 raise ValueError("Multiplicative seasonality is not appropriate " 

135 "for zero and negative values") 

136 

137 if period is None: 

138 if pfreq is not None: 

139 pfreq = freq_to_period(pfreq) 

140 period = pfreq 

141 else: 

142 raise ValueError("You must specify a period or x must be a " 

143 "pandas object with a DatetimeIndex with " 

144 "a freq not set to None") 

145 if x.shape[0] < 2 * pfreq: 

146 raise ValueError('x must have 2 complete cycles requires {0} ' 

147 'observations. x only has {1} ' 

148 'observation(s)'.format(2 * pfreq, x.shape[0])) 

149 

150 if filt is None: 

151 if period % 2 == 0: # split weights at ends 

152 filt = np.array([.5] + [1] * (period - 1) + [.5]) / period 

153 else: 

154 filt = np.repeat(1. / period, period) 

155 

156 nsides = int(two_sided) + 1 

157 trend = convolution_filter(x, filt, nsides) 

158 

159 if extrapolate_trend == 'freq': 

160 extrapolate_trend = period - 1 

161 

162 if extrapolate_trend > 0: 

163 trend = _extrapolate_trend(trend, extrapolate_trend + 1) 

164 

165 if model.startswith('m'): 

166 detrended = x / trend 

167 else: 

168 detrended = x - trend 

169 

170 period_averages = seasonal_mean(detrended, period) 

171 

172 if model.startswith('m'): 

173 period_averages /= np.mean(period_averages, axis=0) 

174 else: 

175 period_averages -= np.mean(period_averages, axis=0) 

176 

177 seasonal = np.tile(period_averages.T, nobs // period + 1).T[:nobs] 

178 

179 if model.startswith('m'): 

180 resid = x / seasonal / trend 

181 else: 

182 resid = detrended - seasonal 

183 

184 results = [] 

185 for s, name in zip((seasonal, trend, resid, x), 

186 ('seasonal', 'trend', 'resid', None)): 

187 results.append(pw.wrap(s.squeeze(), columns=name)) 

188 return DecomposeResult(seasonal=results[0], trend=results[1], 

189 resid=results[2], observed=results[3]) 

190 

191 

192class DecomposeResult(object): 

193 """ 

194 Results class for seasonal decompositions 

195 

196 Parameters 

197 ---------- 

198 observed : array_like 

199 The data series that has been decomposed. 

200 seasonal : array_like 

201 The seasonal component of the data series. 

202 trend : array_like 

203 The trend component of the data series. 

204 resid : array_like 

205 The residual component of the data series. 

206 weights : array_like, optional 

207 The weights used to reduce outlier influence. 

208 """ 

209 def __init__(self, observed, seasonal, trend, resid, weights=None): 

210 self._seasonal = seasonal 

211 self._trend = trend 

212 if weights is None: 

213 weights = np.ones_like(observed) 

214 if isinstance(observed, pd.Series): 

215 weights = pd.Series(weights, index=observed.index, 

216 name='weights') 

217 self._weights = weights 

218 self._resid = resid 

219 self._observed = observed 

220 

221 @property 

222 def observed(self): 

223 """Observed data""" 

224 return self._observed 

225 

226 @property 

227 def seasonal(self): 

228 """The estimated seasonal component""" 

229 return self._seasonal 

230 

231 @property 

232 def trend(self): 

233 """The estimated trend component""" 

234 return self._trend 

235 

236 @property 

237 def resid(self): 

238 """The estimated residuals""" 

239 return self._resid 

240 

241 @property 

242 def weights(self): 

243 """The weights used in the robust estimation""" 

244 return self._weights 

245 

246 @property 

247 def nobs(self): 

248 """Number of observations""" 

249 return self._observed.shape 

250 

251 def plot(self, observed=True, seasonal=True, trend=True, resid=True, 

252 weights=False): 

253 """ 

254 Plot estimated components 

255 

256 Parameters 

257 ---------- 

258 observed : bool 

259 Include the observed series in the plot 

260 seasonal : bool 

261 Include the seasonal component in the plot 

262 trend : bool 

263 Include the trend component in the plot 

264 resid : bool 

265 Include the residual in the plot 

266 weights : bool 

267 Include the weights in the plot (if any) 

268 

269 Returns 

270 ------- 

271 matplotlib.figure.Figure 

272 The figure instance that containing the plot. 

273 """ 

274 from statsmodels.graphics.utils import _import_mpl 

275 from pandas.plotting import register_matplotlib_converters 

276 plt = _import_mpl() 

277 register_matplotlib_converters() 

278 series = [(self._observed, 'Observed')] if observed else [] 

279 series += [(self.trend, 'trend')] if trend else [] 

280 series += [(self.seasonal, 'seasonal')] if seasonal else [] 

281 series += [(self.resid, 'residual')] if resid else [] 

282 series += [(self.weights, 'weights')] if weights else [] 

283 

284 if isinstance(self._observed, (pd.DataFrame, pd.Series)): 

285 nobs = self._observed.shape[0] 

286 xlim = self._observed.index[0], self._observed.index[nobs - 1] 

287 else: 

288 xlim = (0, self._observed.shape[0] - 1) 

289 

290 fig, axs = plt.subplots(len(series), 1) 

291 for i, (ax, (series, def_name)) in enumerate(zip(axs, series)): 

292 if def_name != 'residual': 

293 ax.plot(series) 

294 else: 

295 ax.plot(series, marker='o', linestyle='none') 

296 ax.plot(xlim, (0, 0), color='#000000', zorder=-3) 

297 name = getattr(series, 'name', def_name) 

298 if def_name != 'Observed': 

299 name = name.capitalize() 

300 title = ax.set_title if i == 0 and observed else ax.set_ylabel 

301 title(name) 

302 ax.set_xlim(xlim) 

303 

304 fig.tight_layout() 

305 return fig