--- title: Sample Session dataset keywords: fastai sidebar: home_sidebar summary: "Small sample of session dataset." description: "Small sample of session dataset." nb_path: "nbs/datasets/datasets.sample_session.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

class SampleDataset[source]

SampleDataset(root, column_names={'SESSION_ID': 'session_id', 'ITEM_ID': 'item_id', 'TIMEFRAME': 'timeframe', 'EVENT_DATE': 'eventdate'}) :: SessionDatasetv2

Dataset base class

{% endraw %} {% raw %}
{% endraw %} {% raw %}
ds = SampleDataset(root='/content/samplesession')
{% endraw %} {% raw %}
train_data = pickle.load(open('/content/samplesession/processed/train.txt', 'rb'))
train_data[0][:10]
[[1, 2], [1], [4], [6], [8, 9], [8], [10, 11, 11], [10, 11], [10], [12]]
{% endraw %} {% raw %}
len(train_data[0])
1205
{% endraw %} {% raw %}
train_data, valid_data = split_validation(train_data, valid_portion=0.1)
test_data = valid_data

train_data = base.GraphDataset(train_data, shuffle=True)
test_data = base.GraphDataset(test_data, shuffle=False)
{% endraw %} {% raw %}
train_data.generate_batch(10)[:5]
[array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19]),
 array([20, 21, 22, 23, 24, 25, 26, 27, 28, 29]),
 array([30, 31, 32, 33, 34, 35, 36, 37, 38, 39]),
 array([40, 41, 42, 43, 44, 45, 46, 47, 48, 49])]
{% endraw %} {% raw %}

class SampleDatasetv2[source]

SampleDatasetv2(root, shuffle=False, n_node=309, is_train=True) :: SessionGraphDataset

References

1. COTREC session-based recommender model training. https://t.ly/cXTH.
{% endraw %} {% raw %}
{% endraw %} {% raw %}
root = '/content/samplesessionv2'

train_data = SampleDatasetv2(root=root, shuffle=True, is_train=True)
test_data = SampleDatasetv2(root=root, shuffle=False, is_train=False)
Downloading https://github.com/RecoHut-Datasets/sample_session/raw/v2/all_train_seq.txt
Downloading https://github.com/RecoHut-Datasets/sample_session/raw/v2/train.txt
/usr/local/lib/python3.7/dist-packages/numpy/core/_asarray.py:83: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
  return array(a, dtype, copy=False, order=order)
Using existing file all_train_seq.txt
Downloading https://github.com/RecoHut-Datasets/sample_session/raw/v2/test.txt
/usr/local/lib/python3.7/dist-packages/numpy/core/_asarray.py:83: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
  return array(a, dtype, copy=False, order=order)
{% endraw %}