--- title: ES-RNN: Exponential Smoothing Recurrent Neural Network keywords: fastai sidebar: home_sidebar summary: "API details." description: "API details." nb_path: "nbs/models_esrnn__esrnn.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %}

Test ES-RNN model

{% raw %}
def test_no_leakage_season(n_series, input_size, output_size, seasonality):
    """
    This test checks no leakage for seasonality.
    """
    t.manual_seed(1)
    model = _ESRNN(n_series=n_series, input_size=input_size, output_size=output_size, 
                   output_size_m=1, n_t=0, n_s=0, 
                   es_component='multiplicative', seasonality=seasonality, noise_std=0.000000001,
                   cell_type='GRU',
                   add_nl_layer=False, dilations=[[1, 2]], state_hsize=30)
    S = t.empty((n_series, 0))
    X = t.empty((n_series, 0))
    Y = t.normal(0, 1, (n_series, 2 * (input_size + output_size)))
    Y += Y.min().abs() + 10
    sample_mask = t.ones_like(Y)
    sample_mask[:, -output_size] = 0
    idxs = t.arange(n_series)
    
    # Testing different values for output_size
    # forecasts should be the same in all cases
    # except for numeric exceptions
    Y_to_test = [Y] * 4
    Y_to_test[1][:, -output_size:] = 10_000
    Y_to_test[2][:, -output_size:] = 0
    Y_to_test[3][:, -output_size:] = -10_000
    
    forecasts = []
    for Y in Y_to_test:
        # forward es and rnn
        windows_y_insample, windows_y_outsample, levels, seasonalities, sample_mask_w = model.es(S, Y, X, idxs, sample_mask=sample_mask)
        trends = model.rnn(windows_y_insample)
        trends = trends.permute(1, 0, 2)

        # predict es
        y_hat = model.es.predict(trends=trends, levels=levels, seasonalities=seasonalities)

        forecasts.append(y_hat)
        
    assert all(t.allclose(forecasts[0], forecast) for forecast in forecasts), (
        'Season leakage detected ',
        'please check.'
    )
{% endraw %} {% raw %}
test_no_leakage_season(n_series=10, input_size=10, output_size=8, seasonality=[6])
{% endraw %} {% raw %}
test_no_leakage_season(n_series=10, input_size=10, output_size=2, seasonality=[6])
{% endraw %} {% raw %}
test_no_leakage_season(n_series=10, input_size=10, output_size=8, seasonality=[12, 24])
{% endraw %} {% raw %}
test_no_leakage_season(n_series=10, input_size=10, output_size=8, seasonality=[6, 12])
{% endraw %} {% raw %}
test_no_leakage_season(n_series=10, input_size=10, output_size=18, seasonality=[12])
{% endraw %} {% raw %}
test_no_leakage_season(n_series=10, input_size=10, output_size=4, seasonality=[1])
{% endraw %} {% raw %}
test_no_leakage_season(n_series=10, input_size=10, output_size=18, seasonality=[12, 24])
{% endraw %}

ES-RNN model wrapper

{% raw %}
{% endraw %} {% raw %}

class ESRNN[source]

ESRNN(n_series:int, n_x:int, n_s:int, input_size:int, output_size:int, sample_freq:int, es_component:str='multiplicative', cell_type:str='LSTM', state_hsize:int=50, dilations:List[List[int]]=[[1, 2], [4, 8]], add_nl_layer:bool=False, seasonality:List[int]=[], learning_rate:float=0.001, lr_scheduler_step_size:int=9, lr_decay:float=0.9, per_series_lr_multip:float=1.0, gradient_eps:float=1e-08, gradient_clipping_threshold:float=20.0, rnn_weight_decay:float=0.0, noise_std:float=0.001, level_variability_penalty:float=20.0, testing_percentile:Union[int, List[T]]=50, training_percentile:Union[int, List[T]]=50, loss:str='SMYL', val_loss:str='MAE') :: LightningModule

Helper class that provides a standard way to create an ABC using inheritance.

{% endraw %} {% raw %}
{% endraw %}

ES-RNN Univariate Example

{% raw %}
import matplotlib.pyplot as plt

from nixtlats.data.datasets.epf import EPF, EPFInfo
from nixtlats.data.tsdataset import TimeSeriesDataset
from nixtlats.data.tsloader import TimeSeriesLoader

from pytorch_lightning.callbacks import EarlyStopping
{% endraw %} {% raw %}
Y_df, X_df, _ = EPF.load(directory='./data', group=EPFInfo.groups[0])

X_df = X_df[['unique_id', 'ds', 'week_day']]

# Trimming series to avoid slow backprop through time
Y_df = Y_df.groupby('unique_id').tail(90*24+30*24)
X_df = X_df.groupby('unique_id').tail(90*24+30*24)

# Leveling Y_df (multiplicative model)
Y_min = Y_df.y.min()
Y_df.y = Y_df.y - Y_min + 20

train_dataset = TimeSeriesDataset(Y_df=Y_df, X_df=X_df,
                                  ds_in_test=30*24,
                                  is_test=False,
                                  input_size=7*24,
                                  output_size=24,
                                  verbose=True)

valid_dataset = TimeSeriesDataset(Y_df=Y_df, X_df=X_df,
                                  ds_in_test=30*24,
                                  is_test=True,
                                  input_size=7*24,
                                  output_size=24,
                                  verbose=True)

train_loader = TimeSeriesLoader(dataset=train_dataset,
                                num_workers=4,
                                batch_size=32,
                                shuffle=True)

valid_loader = TimeSeriesLoader(dataset=valid_dataset,
                                num_workers=4,
                                batch_size=1,
                                shuffle=False)
INFO:root:Train Validation splits

INFO:root:                              ds                    
                             min                 max
unique_id sample_mask                               
NP        0           2018-11-25 2018-12-24 23:00:00
          1           2018-08-27 2018-11-24 23:00:00
INFO:root:
Total data 			2880 time stamps 
Available percentage=100.0, 	2880 time stamps 
Insample  percentage=75.0, 	2160 time stamps 
Outsample percentage=25.0, 	720 time stamps 

INFO:root:Train Validation splits

INFO:root:                              ds                    
                             min                 max
unique_id sample_mask                               
NP        0           2018-08-27 2018-11-24 23:00:00
          1           2018-11-25 2018-12-24 23:00:00
INFO:root:
Total data 			2880 time stamps 
Available percentage=100.0, 	2880 time stamps 
Insample  percentage=25.0, 	720 time stamps 
Outsample percentage=75.0, 	2160 time stamps 

{% endraw %} {% raw %}
model = ESRNN(# Architecture parameters
              n_series=train_dataset.n_series,
              n_s=train_dataset.n_s,
              n_x=train_dataset.n_x,
              input_size=train_dataset.input_size,
              output_size=train_dataset.output_size,
              sample_freq=train_dataset.output_size,
              es_component='multiplicative',
              cell_type='LSTM',
              state_hsize=10,
              dilations=[[1, 24], [48, 168]],
              add_nl_layer=False,
              # Regularization and optimization parameters
              learning_rate=5e-3,
              lr_scheduler_step_size=100,
              lr_decay=0.9,
              per_series_lr_multip=1.5,
              gradient_eps=1e-8,
              gradient_clipping_threshold=10,
              rnn_weight_decay=0,
              noise_std=0.001,
              level_variability_penalty=10,
              testing_percentile=50,
              training_percentile=51,
              loss='SMYL',
              val_loss='MAE',
              seasonality=[24])
{% endraw %} {% raw %}
early_stopping = EarlyStopping(monitor="val_loss", 
                               min_delta=1e-4, 
                               patience=1, verbose=True, 
                               mode="min")

trainer = pl.Trainer(max_epochs=2, progress_bar_refresh_rate=1, 
                     log_every_n_steps=1, check_val_every_n_epoch=1,
                     callbacks=[early_stopping])
trainer.fit(model, train_loader, valid_loader)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

  | Name  | Type   | Params
---------------------------------
0 | model | _ESRNN | 17.8 K
---------------------------------
17.8 K    Trainable params
0         Non-trainable params
17.8 K    Total params
0.071     Total estimated model params size (MB)
Metric val_loss improved. New best score: 14.236
Metric val_loss improved by 1.447 >= min_delta = 0.0001. New best score: 12.790
{% endraw %} {% raw %}
outputs = trainer.predict(model, valid_loader)

y_true, y_hat, sample_mask = zip(*outputs)
y_true = t.cat(y_true).cpu()
y_hat = t.cat(y_hat).cpu()
sample_mask = t.cat(sample_mask).cpu()

print("Original")
print("y_true.shape", y_true.shape)
print("y_hat.shape", y_hat.shape)
y_true = y_true.flatten(1,2)
y_hat = y_hat.flatten(1,2)
sample_mask = sample_mask.flatten(1,2)

print("\nFlatten")
print("y_true.shape", y_true.shape)
print("y_hat.shape", y_hat.shape)
print("sample_mask.shape", sample_mask.shape)
Original
y_true.shape torch.Size([1, 113, 24])
y_hat.shape torch.Size([1, 113, 24])

Flatten
y_true.shape torch.Size([1, 2712])
y_hat.shape torch.Size([1, 2712])
sample_mask.shape torch.Size([1, 2712])
{% endraw %} {% raw %}
y_true = y_true + Y_min - 20
y_hat = y_hat + Y_min - 20
{% endraw %} {% raw %}
from scipy import stats

import pandas as pd

from nixtlats.losses.numpy import mae, rmae, smape, rmse
from nixtlats.data.datasets.epf import epf_naive_forecast

Y_naive_df = epf_naive_forecast(Y_df)

# Filter test hours
y_true = y_true[0, -30*24:]
y_hat = y_hat[0, -30*24:]
y_naive = Y_naive_df.y_hat.values[-30*24:]

metrics = pd.Series({'mae' :  mae(y=y_true, y_hat=y_hat),
                     'rmae':  rmae(y=y_true, y_hat1=y_hat, y_hat2=y_naive),
                     'smape': smape(y=y_true, y_hat=y_hat),
                     'rmse':  rmse(y=y_true, y_hat=y_hat)})

print(metrics)
print('\n')
print(stats.describe(y_true-y_hat))
print(f'model.training_percentile {model.training_percentile}')

plt.plot(sample_mask[0,:])
plt.show()
mae      13.490134
rmae      0.742696
smape    23.856249
rmse     17.098967
dtype: float64


DescribeResult(nobs=720, minmax=(-54.182884, 39.697495), mean=-3.9000943, variance=277.5494, skewness=-0.416260302066803, kurtosis=-0.3610564153515341)
model.training_percentile 51
{% endraw %} {% raw %}
start = 0
end = 7 * 24

fig = plt.figure(figsize=(15, 6))
plt.plot(y_true[start:end], color='#628793', linewidth=0.4, label='true')
plt.plot(y_hat[start:end], color='peru', linewidth=0.4, label='forecast')
plt.ylabel('Price [EUR/MWh]', fontsize=15)
plt.xlabel('Date', fontsize=15)
plt.legend()
plt.grid()
plt.show()
{% endraw %}

ES-RNN Multivariate Example

{% raw %}
from nixtlats.data.datasets.tourism import Tourism, TourismInfo

group = TourismInfo['Yearly']
print(group.name)
Y_df, *_ = Tourism.load(directory='./data', group=group.name)
Yearly
{% endraw %} {% raw %}
fill_uids = Y_df.groupby('unique_id').size().sort_values() \
                .add(-group.horizon-1) \
                .loc[lambda x: x < 2 * group.horizon] \
                .index
{% endraw %} {% raw %}
new_ys = []
for uid in fill_uids:
    y = Y_df.query('unique_id == @uid')

    ds_to_fill = 2 * group.horizon - y.shape[0] + group.horizon + 1

    ds = pd.date_range(end = y.iloc[0].ds, periods = ds_to_fill + 1, freq = group.freq)[:ds_to_fill]
    new_y = pd.DataFrame({'ds': ds})
    new_y['unique_id'] = y.iloc[0].unique_id
    new_y['y'] = y.iloc[0].y
    
    new_ys.append(new_y)
new_ys = pd.concat(new_ys)
{% endraw %} {% raw %}
Y_df = pd.concat([Y_df, new_ys]).sort_values(['unique_id', 'ds'], ignore_index=True)
{% endraw %} {% raw %}
train_dataset = TimeSeriesDataset(Y_df=Y_df,
                                  ds_in_test=group.horizon,
                                  is_test=False,
                                  input_size=group.horizon,
                                  output_size=group.horizon,
                                  verbose=True)

valid_dataset = TimeSeriesDataset(Y_df=Y_df,
                                  ds_in_test=0,
                                  is_test=True,
                                  input_size=group.horizon,
                                  output_size=group.horizon,
                                  verbose=True)
INFO:root:Train Validation splits

INFO:root:                    ds           
                   min        max
sample_mask                      
0           1990-12-31 2007-12-31
1           1960-12-31 2003-12-31
INFO:root:
Total data 			12708 time stamps 
Available percentage=100.0, 	12708 time stamps 
Insample  percentage=83.7, 	10636 time stamps 
Outsample percentage=16.3, 	2072 time stamps 

INFO:root:Train Validation splits

INFO:root:                    ds           
                   min        max
sample_mask                      
0           1960-12-31 2007-12-31
INFO:root:
Total data 			12708 time stamps 
Available percentage=100.0, 	12708 time stamps 
Insample  percentage=0.0, 	0 time stamps 
Outsample percentage=100.0, 	12708 time stamps 

{% endraw %} {% raw %}
train_loader = TimeSeriesLoader(dataset=train_dataset,
                                batch_size=32,
                                eq_batch_size=True,
                                shuffle=True)

valid_loader = TimeSeriesLoader(dataset=valid_dataset,
                                batch_size=1,
                                shuffle=False)
{% endraw %} {% raw %}
dataloader = iter(train_loader)
batch = next(dataloader)
S, Y, X = batch['S'], batch['Y'], batch['X']
available_mask = batch['available_mask']
idxs = batch['idxs']

print("S.shape", S.shape)
print("Y.shape", Y.shape)
print("X.shape", X.shape)
print("idxs.shape", idxs.shape)
S.shape torch.Size([32, 0])
Y.shape torch.Size([32, 47])
X.shape torch.Size([32, 0, 47])
idxs.shape torch.Size([32])
{% endraw %} {% raw %}
model = ESRNN(n_series=train_dataset.n_series,
              n_s=train_dataset.n_s,
              n_x=train_dataset.n_x,
              input_size=train_dataset.input_size,
              output_size=train_dataset.output_size,
              sample_freq=1,
              learning_rate=5e-3,
              lr_scheduler_step_size=100,
              lr_decay=0.9,
              per_series_lr_multip=1.5,
              gradient_eps=1e-8,
              gradient_clipping_threshold=10,
              rnn_weight_decay=0,
              noise_std=0.001,
              level_variability_penalty=10,
              testing_percentile=50,
              training_percentile=51,
              es_component='multiplicative',
              cell_type='GRU',
              state_hsize=50,
              dilations=[[24, 48], [168]],
              add_nl_layer=False,
              loss='SMYL',
              val_loss='MAE',
              seasonality=[1])
{% endraw %} {% raw %}
early_stopping = EarlyStopping(monitor="val_loss", 
                               min_delta=1e-4, 
                               patience=1, verbose=True, 
                               mode="min")
{% endraw %} {% raw %}
trainer = pl.Trainer(max_epochs=2, progress_bar_refresh_rate=1, 
                     log_every_n_steps=1, check_val_every_n_epoch=1,
                     callbacks=[early_stopping])
trainer.fit(model, train_loader, valid_loader)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

  | Name  | Type   | Params
---------------------------------
0 | model | _ESRNN | 40.8 K
---------------------------------
40.8 K    Trainable params
0         Non-trainable params
40.8 K    Total params
0.163     Total estimated model params size (MB)
/home/federicogarza2/anaconda3/envs/nixtla/lib/python3.7/site-packages/pytorch_lightning/utilities/distributed.py:69: UserWarning: The dataloader, val dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 8 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
  warnings.warn(*args, **kwargs)
/home/federicogarza2/anaconda3/envs/nixtla/lib/python3.7/site-packages/pytorch_lightning/utilities/distributed.py:69: UserWarning: The dataloader, train dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 8 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
  warnings.warn(*args, **kwargs)
Metric val_loss improved. New best score: 68293.117
Monitored metric val_loss did not improve in the last 1 records. Best score: 68293.117. Signaling Trainer to stop.
{% endraw %} {% raw %}
outputs = trainer.predict(model, valid_loader)
y, y_hat, mask = zip(*outputs)
y = t.cat(y, axis=1)
y_hat = t.cat(y_hat, axis=1)
mask = t.cat(mask, axis=1)
print("y_true.shape", y.shape)
print("y_hat.shape", y_hat.shape)
print("mask.shape", mask.shape)
/home/federicogarza2/anaconda3/envs/nixtla/lib/python3.7/site-packages/pytorch_lightning/utilities/distributed.py:69: UserWarning: The dataloader, predict dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 8 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
  warnings.warn(*args, **kwargs)
y_true.shape torch.Size([1, 9082, 4])
y_hat.shape torch.Size([1, 9082, 4])
mask.shape torch.Size([1, 9082, 4])
{% endraw %}