--- title: SR keywords: fastai sidebar: home_sidebar summary: "Sequential Rules" description: "Sequential Rules" nb_path: "nbs/models/models.sr.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

class SequentialRules[source]

SequentialRules(steps=10, weighting='div', pruning=20, session_key='SessionID', item_keys=['ItemId'])

SequentialRules(steps = 10, weighting='div', pruning=20.0, session_key='SessionId', item_keys=['ItemId'])

Parameters

pruning : int Prune the results per item to a list of the top N co-occurrences. (Default value: 10) session_key : string The data frame key for the session identifier. (Default value: SessionId) item_keys : string The data frame list of keys for the item identifier as first item in list and features keys next. (Default value: [ItemID]) steps : int Number of steps to walk back from the currently viewed item. (Default value: 10) weighting : string Weighting function for the previous items (linear, same, div, log, qudratic). (Default value: div) pruning : int Prune the results per item to a list of the top N sequential co-occurrences. (Default value: 20).

{% endraw %} {% raw %}
{% endraw %} {% raw %}
import os
import time
import argparse
import pandas as pd
from recohut.utils.common_utils import download_url
{% endraw %} {% raw %}
data_root = '/content/data'
download_url('https://github.com/RecoHut-Datasets/yoochoose/raw/v4/yoochoose_train.txt', data_root)
download_url('https://github.com/RecoHut-Datasets/yoochoose/raw/v4/yoochoose_valid.txt', data_root)
Downloading https://github.com/RecoHut-Datasets/yoochoose/raw/v4/yoochoose_train.txt
Downloading https://github.com/RecoHut-Datasets/yoochoose/raw/v4/yoochoose_valid.txt
'/content/data/yoochoose_valid.txt'
{% endraw %} {% raw %}
parser = argparse.ArgumentParser()
parser.add_argument('--prune', type=int, default=0, help="Association Rules Pruning Parameter")
parser.add_argument('--K', type=int, default=20, help="K items to be used in Recall@K and MRR@K")
parser.add_argument('--steps', type=int, default=10, help="Max Number of steps to walk back from the currently viewed item")
parser.add_argument('--weighting', type=str, default='div', help="Weighting function for the previous items (linear, same, div, log, qudratic)")
parser.add_argument('--itemid', default='sid', type=str)
parser.add_argument('--sessionid', default='uid', type=str)
parser.add_argument('--item_feats', default='', type=str, 
                    help="Names of Columns containing items features separated by #")
parser.add_argument('--valid_data', default='yoochoose_valid.txt', type=str)
parser.add_argument('--train_data', default='yoochoose_train.txt', type=str)
parser.add_argument('--data_folder', default=data_root, type=str)

# Get the arguments
args = parser.parse_args([])
train_data = os.path.join(args.data_folder, args.train_data)
x_train = pd.read_csv(train_data)
valid_data = os.path.join(args.data_folder, args.valid_data)
x_valid = pd.read_csv(valid_data)
x_valid.sort_values(args.sessionid, inplace=True)

items_feats = [args.itemid]
ffeats = args.item_feats.strip().split("#")
if ffeats[0] != '':
    items_feats.extend(ffeats)

print('Finished Reading Data \nStart Model Fitting...')
# Fitting AR Model
t1 = time.time()
model = SequentialRules(session_key = args.sessionid, item_keys = items_feats, 
                        pruning=args.prune, steps=args.steps, weighting=args.weighting)
model.fit(x_train)
t2 = time.time()
print('End Model Fitting with total time =', t2 - t1, '\n Start Predictions...')

# Test Set Evaluation
test_size = 0.0
hit = 0.0
MRR = 0.0
cur_length = 0
cur_session = -1
last_items = []
t1 = time.time()
index_item = x_valid.columns.get_loc(args.itemid)
index_session = x_valid.columns.get_loc(args.sessionid)
train_items = model.items_features.keys()
counter = 0
for row in x_valid.itertuples( index=False ):
    counter += 1
    if counter % 5000 == 0:
        print('Finished Prediction for ', counter, 'items.')
    session_id, item_id = row[index_session], row[index_item]
    if session_id != cur_session:
        cur_session = session_id
        last_items = []
        cur_length = 0
    
    if not item_id in last_items and item_id in train_items:
        if len(last_items) > cur_length: #make prediction
            cur_length += 1
            test_size += 1
            # Predict the most similar items to items
            predictions = model.predict_next(last_items, k = args.K)
            #print('preds:', predictions)
            # Evaluation
            rank = 0
            for predicted_item in predictions:
                rank += 1
                if predicted_item == item_id:
                    hit += 1.0
                    MRR += 1/rank
                    break
        
        last_items.append(item_id)
t2 = time.time()
print('Recall: {}'.format(hit / test_size))
print ('\nMRR: {}'.format(MRR / test_size))
print('End Model Predictions with total time =', t2 - t1)
Finished Reading Data 
Start Model Fitting...
End Model Fitting with total time = 23.63178014755249 
 Start Predictions...
Finished Prediction for  5000 items.
Recall: 0.44143625192012287

MRR: 0.16021305829773633
End Model Predictions with total time = 122.83067488670349
{% endraw %}