--- title: EPF dataset keywords: fastai sidebar: home_sidebar summary: "Donwload the EDF dataset." description: "Donwload the EDF dataset." nb_path: "nbs/data_datasets__epf.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

class NP[source]

NP(test_date:str='2016-12-27', name:str='NP')

NP(test_date: str = '2016-12-27', name: str = 'NP')

{% endraw %} {% raw %}

class PJM[source]

PJM(test_date:str='2016-12-27', name:str='PJM')

PJM(test_date: str = '2016-12-27', name: str = 'PJM')

{% endraw %} {% raw %}

class BE[source]

BE(test_date:str='2015-01-04', name:str='BE')

BE(test_date: str = '2015-01-04', name: str = 'BE')

{% endraw %} {% raw %}

class FR[source]

FR(test_date:str='2015-01-04', name:str='FR')

FR(test_date: str = '2015-01-04', name: str = 'FR')

{% endraw %} {% raw %}

class DE[source]

DE(test_date:str='2016-01-04', name:str='DE')

DE(test_date: str = '2016-01-04', name: str = 'DE')

{% endraw %} {% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

class EPF[source]

EPF()

{% endraw %} {% raw %}
{% endraw %} {% raw %}

epf_naive_forecast[source]

epf_naive_forecast(Y_df)

Function to build the naive forecast for electricity price forecasting

The function is used to compute the accuracy metrics MASE and RMAE, the function assumes that the number of prices per day is 24. And computes naive forecast for days of the week and seasonal Naive forecast for weekends.

Parameters

Y_df : pandas.DataFrame Dataframe containing the real prices in long format that contains variables ['ds', 'unique_id', 'y']

Returns

Y_hat_df : pandas.DataFrame Dataframe containing the predictions of the epf naive forecast.

{% endraw %} {% raw %}
{% endraw %}

Load specific group

{% raw %}
args = pd.Series({'dataset': 'NP'})

Y_df, Xt_df, _ = EPF.load(directory='data', group=args.dataset)

# train_mask: 1 to keep, 0 to mask
offset = 365 * 24 * 2
train_outsample_mask = np.ones(len(Y_df))
train_outsample_mask[-offset:] = 0

print(f'Dataset: {args.dataset}')
#print("Xt_df.columns", Xt_df.columns)
print(f'Train mask percentage: {np.round(np.sum(train_outsample_mask)/len(train_outsample_mask),2)}')
print('X: time series features, of shape (#hours, #times,#features): \t' + str(Xt_df.shape))
print('Y: target series (in X), of shape (#hours, #times): \t \t' + str(Y_df.shape))
print(f'Last ds {Y_df.ds.max()}')
print(f'Train {sum(1-train_outsample_mask)} hours = {np.round(sum(1-train_outsample_mask)/(24*365),2)} years')
print(f'Validation {sum(train_outsample_mask)} hours = {np.round(sum(train_outsample_mask)/(24*365),2)} years')
# print('S: static features, of shape (#series,#features): \t \t' + str(S.shape))
#Y_df.head()
print('\n')
100%|██████████| 2.12M/2.12M [00:02<00:00, 927kiB/s] 
INFO:nixtla.data.datasets.utils:Successfully downloaded NP.csv, 2118914, bytes.
100%|██████████| 2.53M/2.53M [00:01<00:00, 2.08MiB/s]
INFO:nixtla.data.datasets.utils:Successfully downloaded PJM.csv, 2530311, bytes.
100%|██████████| 1.97M/1.97M [00:03<00:00, 567kiB/s]
INFO:nixtla.data.datasets.utils:Successfully downloaded BE.csv, 1974555, bytes.
100%|██████████| 2.00M/2.00M [00:01<00:00, 1.37MiB/s]
INFO:nixtla.data.datasets.utils:Successfully downloaded FR.csv, 1996871, bytes.
100%|██████████| 2.10M/2.10M [00:03<00:00, 681kiB/s]
INFO:nixtla.data.datasets.utils:Successfully downloaded DE.csv, 2095831, bytes.
Dataset: NP
Train mask percentage: 0.67
X: time series features, of shape (#hours, #times,#features): 	(52416, 12)
Y: target series (in X), of shape (#hours, #times): 	 	(52416, 3)
Last ds 2018-12-24 23:00:00
Train 17520.0 hours = 2.0 years
Validation 34896.0 hours = 3.98 years


{% endraw %}

Load all groups

{% raw %}
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

import pylab as plt
from pylab import rcParams
plt.style.use('seaborn-whitegrid')

from matplotlib import rcParams
plt.rcParams['font.family'] = 'serif'

FONTSIZE = 22
{% endraw %} {% raw %}
from nixtlats.data.datasets.epf import EPF, EPFInfo
if not os.path.exists('./results/'):
    os.makedirs('./results/')

dataset = ['NP', 'PJM', 'BE', 'FR', 'DE']
Y_df, X_df, S_df = EPF.load_groups(directory='data', groups=dataset)

fig = plt.figure(figsize=(20, 15))
fig.tight_layout()
plt.ylim(-200, 800)
rcParams['figure.figsize'] = 15, 15
ax0 = plt.subplot2grid((3,2),(0, 0))
ax1 = plt.subplot2grid((3,2),(0, 1))
ax2 = plt.subplot2grid((3,2),(1, 0))
ax3 = plt.subplot2grid((3,2),(1, 1))
ax4 = plt.subplot2grid((3,2),(2, 0))
axs = [ax0, ax1, ax2, ax3, ax4]

for idx, market in enumerate(dataset):
    currency   = 'USD' if market == 'PJM' else 'EUR'
    title_str  = 'EPEX-' if not (market in ['PJM', 'NP']) else ''
    title_str += f'{market} market'
    y_axis_str = f'Price [{currency}/MWh]'

    x_plot = Y_df[Y_df.unique_id==market].ds.values
    y_plot = Y_df[Y_df.unique_id==market].y.values
    
    x_axis_str = f'Hours [{str(x_plot.min())[:10]}  to {str(x_plot.max())[:10]}]'


    axs[idx].plot(x_plot, y_plot, color='#628793', linewidth=0.4)
    axs[idx].tick_params(labelsize=FONTSIZE-2)
    axs[idx].set_xlabel(x_axis_str, fontsize=FONTSIZE)
    axs[idx].vlines(x_plot[-728*24],-200,800, linestyle=(0, (5, 10)), 
                    color='black', linewidth=1.)
    axs[idx].set_ylabel(y_axis_str, fontsize=FONTSIZE)
    axs[idx].set_title(title_str)
    axs[idx].set_ylim(-200,800)

plt.subplots_adjust(left=0.125, bottom=0.1, right=0.9, top=1.2, wspace=0.2, hspace=0.2)
plt.savefig('./results/market_plots.pdf', bbox_inches = 'tight')
plt.show()
{% endraw %} {% raw %}
from nixtlats.data.datasets.epf import EPF, EPFInfo

EXOGENOUS_NAMES = {'NP': ['Load [GW]', 'Wind Generation [GW]'],
                   'PJM': ['Load [GW]', 'COMED Load [GW]'],
                   'BE': ['Load [GW]', 'Total France Generation [GW]'],
                   'FR': ['Load [GW]', 'Total France Generation [GW]'],
                   'DE': ['TSO Zonal Load [GW]', 'DE Wind Generation [GW]']}

# dataset = ['NP', 'PJM', 'BE', 'FR', 'DE']
dataset = ['NP']
# dataset = ['DE']
Y_df, X_df, S_df = EPF.load_groups(directory='data', groups=dataset)

fig = plt.figure(figsize=(34, 12))
fig.tight_layout()
plt.ylim(-200, 800)
# rcParams['figure.figsize'] = 15, 15
ax0 = plt.subplot2grid((3,2),(0, 0))
ax1 = plt.subplot2grid((3,2),(1, 0))
ax2 = plt.subplot2grid((3,2),(2, 0))
axs = [ax0, ax1, ax2]

# for idx, market in enumerate(dataset):
market = dataset[0]
currency   = 'USD' if market == 'PJM' else 'EUR'
title_str  = 'EPEX-' if not (market in ['PJM', 'NP']) else ''
title_str += f'{market} market'
y_axis_str = f'Price [{currency}/MWh]'

x_plot = Y_df.ds.values
x_plot_min = pd.to_datetime(x_plot.min()).strftime('%B %d, %Y')
x_plot_max = pd.to_datetime(x_plot.max()).strftime('%B %d, %Y')
x_axis_str = f'Hours [{x_plot_min}  to  {x_plot_max}]'

y_plot = Y_df.y.values
x1_plot = X_df.Exogenous1.values
x2_plot = X_df.Exogenous2.values

axs[0].plot(x_plot, y_plot, color='#628793', linewidth=0.4, alpha=1.)
axs[0].vlines(x_plot[-728*24],0,200, linestyle=(0, (5, 10)), 
              color='black', linewidth=1.9)
axs[0].tick_params(labelsize=FONTSIZE-2)
axs[0].set_xlabel(x_axis_str, fontsize=FONTSIZE)
axs[0].set_ylabel(y_axis_str, fontsize=FONTSIZE)
#axs[0].vlines(x_plot[-728*24],-250,280, linestyle=(0, (5, 10)), 
#              color='black', linewidth=1.)
#axs[0].vlines(x_plot[-728*24],0,210, linestyle=(0, (5, 10)), 
#                color='black', linewidth=1.)

axs[1].plot(x_plot, x1_plot/1000, color='#628793', linewidth=0.37, alpha=0.8)
axs[1].vlines(x_plot[-728*24],25,72, linestyle=(0, (5, 10)), 
              color='black', linewidth=1.9)
axs[1].tick_params(labelsize=FONTSIZE-2)
axs[1].set_xlabel(x_axis_str, fontsize=FONTSIZE)
axs[1].set_ylabel(EXOGENOUS_NAMES[market][0], fontsize=FONTSIZE)

x2_plot[-728*24-60:-728*24+60] = [np.nan] * 60 * 2 # mini hack
axs[2].plot(x_plot, x2_plot/1000, color='#628793', linewidth=0.37, alpha=0.8)
axs[2].vlines(x_plot[-728*24],-.2,5.2, linestyle=(0, (5, 10)), 
              color='black', linewidth=1.9)
axs[2].tick_params(labelsize=FONTSIZE-2)
axs[2].set_xlabel(x_axis_str, fontsize=FONTSIZE)
axs[2].set_ylabel(EXOGENOUS_NAMES[market][1], fontsize=FONTSIZE)

# # axs[idx].set_title(title_str)
# axs[idx].set_ylim(-200,800)
# # print(\x_plot[-728*24]\, x_plot[-728*24])

plt.subplots_adjust(left=0.125, bottom=0.1, right=0.9, top=1.5, wspace=0.2, hspace=0.2)
plt.savefig('./results/NP.pdf', bbox_inches = 'tight')
plt.show()
{% endraw %} {% raw %}
import pandas as pd 

# dataset = ['NP', 'PJM', 'BE', 'FR', 'DE']
dataset = ['NP']
# dataset = ['DE']
Y_df, X_df, S_df = EPF.load_groups(directory='data', groups=dataset)

fig = plt.figure(figsize=(15.5, 5))

fig.tight_layout()
plt.ylim(-200, 800)
ax0 = plt.subplot2grid((1,1),(0, 0))
axs = [ax0]

# for idx, market in enumerate(dataset):
market = dataset[0]
currency = 'USD' if market == 'PJM' else 'EUR'
title_str  = 'EPEX-' if not (market in ['PJM', 'NP']) else ''
title_str += f'{market} market'
y_axis_str = f'Price [{currency}/MWh]'

x_plot = Y_df.ds.values
x_plot_min = pd.to_datetime(x_plot.min()).strftime('%B %d, %Y') #'%B %d, %Y, %r'
x_plot_max = pd.to_datetime(x_plot.max()).strftime('%B %d, %Y') #'%B %d, %Y, %r'

x_axis_str = f'Hours [{x_plot_min}  to {x_plot_max}]'

y_plot = Y_df.y.values
x1_plot = X_df.Exogenous1.values
x2_plot = X_df.Exogenous2.values

axs[0].plot(x_plot, y_plot, color='#628793', linewidth=0.4)
axs[0].tick_params(labelsize=FONTSIZE-2)
axs[0].set_xlabel(x_axis_str, fontsize=FONTSIZE)
axs[0].set_ylabel(y_axis_str, fontsize=FONTSIZE)

axs[0].vlines(x_plot[-(42*7*24)-(728*24)],0,200, linestyle=(0, (5, 10)), 
              color='black', linewidth=1.)
axs[0].vlines(x_plot[-728*24],0,200, linestyle=(0, (5, 10)), 
              color='black', linewidth=1.)
plt.savefig('./results/train_methodology.pdf', bbox_inches = 'tight')
plt.show()
{% endraw %}

VALIDATION OF EPF SEASONAL NAIVE FORECASTS

{% raw %}
Y_hat_df = epf_naive_forecast(Y_df)

fig = plt.figure(figsize=(15.5, 5))
plt.plot(Y_hat_df['y'][:24*7*3], label='true')
plt.plot(Y_hat_df['y_hat'][:24*7*3], label='naive')
plt.legend()
plt.show()
{% endraw %}