--- title: AR keywords: fastai sidebar: home_sidebar summary: "Association Rules" description: "Association Rules" nb_path: "nbs/models/models.ar.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

class AssosiationRules[source]

AssosiationRules(pruning=10, session_key='SessionID', item_keys=['ItemID'])

AssosiationRules(pruning=10, session_key='SessionId', item_keys=['ItemId'])

Parameters

pruning : int Prune the results per item to a list of the top N co-occurrences. (Default value: 10) session_key : string The data frame key for the session identifier. (Default value: SessionId) item_keys : string The data frame list of keys for the item identifier as first item in list and features keys next. (Default value: [ItemId])

{% endraw %} {% raw %}
{% endraw %} {% raw %}
import os
import time
import argparse
import pandas as pd
from recohut.utils.common_utils import download_url
{% endraw %} {% raw %}
data_root = '/content/data'
download_url('https://github.com/RecoHut-Datasets/yoochoose/raw/v4/yoochoose_train.txt', data_root)
download_url('https://github.com/RecoHut-Datasets/yoochoose/raw/v4/yoochoose_valid.txt', data_root)
Downloading https://github.com/RecoHut-Datasets/yoochoose/raw/v4/yoochoose_train.txt
Downloading https://github.com/RecoHut-Datasets/yoochoose/raw/v4/yoochoose_valid.txt
'/content/data/yoochoose_valid.txt'
{% endraw %} {% raw %}
parser = argparse.ArgumentParser()
parser.add_argument('--prune', type=int, default=10, help="Association Rules Pruning Parameter")
parser.add_argument('--K', type=int, default=20, help="K items to be used in Recall@K and MRR@K")
parser.add_argument('--itemid', default='sid', type=str)
parser.add_argument('--sessionid', default='uid', type=str)
parser.add_argument('--item_feats', default='', type=str, 
                    help="Names of Columns containing items features separated by #")
parser.add_argument('--valid_data', default='yoochoose_valid.txt', type=str)
parser.add_argument('--train_data', default='yoochoose_train.txt', type=str)
parser.add_argument('--data_folder', default=data_root, type=str)

# Get the arguments
args = parser.parse_args([])
train_data = os.path.join(args.data_folder, args.train_data)
x_train = pd.read_csv(train_data)
valid_data = os.path.join(args.data_folder, args.valid_data)
x_valid = pd.read_csv(valid_data)
x_valid.sort_values(args.sessionid, inplace=True)

items_feats = [args.itemid]
ffeats = args.item_feats.strip().split("#")
if ffeats[0] != '':
    items_feats.extend(ffeats)

print('Finished Reading Data.')
# Fitting AR Model
print('Start Model Fitting...')
t1 = time.time()
model = AssosiationRules(session_key = args.sessionid, item_keys = items_feats, pruning=args.prune)
model.fit(x_train)
t2 = time.time()
print('End Model Fitting with total time =', t2 - t1)

print('Start Predictions...')
# Test Set Evaluation
test_size = 0.0
hit = 0.0
MRR = 0.0
cur_length = 0
cur_session = -1
last_items = []
t1 = time.time()
index_item = x_valid.columns.get_loc(args.itemid)
index_session = x_valid.columns.get_loc(args.sessionid)
train_items = model.items_features.keys()
counter = 0
for row in x_valid.itertuples(index=False):
    counter += 1
    if counter % 5000 == 0:
        print('Finished Prediction for ', counter, 'items.')
    session_id, item_id = row[index_session], row[index_item]
    if session_id != cur_session:
        cur_session = session_id
        last_items = []
        cur_length = 0
    
    if not item_id in last_items and item_id in train_items:
        if len(last_items) > cur_length: #make prediction
            cur_length += 1
            test_size += 1
            # Predict the most similar items to items
            predictions = model.predict_next(last_items, k = args.K)
            #print('preds:', predictions)
            # Evaluation
            rank = 0
            for predicted_item in predictions:
                rank += 1
                if predicted_item == item_id:
                    hit += 1.0
                    MRR += 1/rank
                    break
        
        last_items.append(item_id)
t2 = time.time()
print('Recall: {}'.format(hit / test_size))
print ('\nMRR: {}'.format(MRR / test_size))
print('End Model Predictions with total time =', t2 - t1)
Finished Reading Data.
Start Model Fitting...
End Model Fitting with total time = 47.870760679244995
Start Predictions...
Finished Prediction for  5000 items.
Recall: 0.26574500768049153

MRR: 0.1308005998098165
End Model Predictions with total time = 110.56373572349548
{% endraw %}