--- title: Prod2Vec keywords: fastai sidebar: home_sidebar summary: "Implementation of Prod2vec model." description: "Implementation of Prod2vec model." nb_path: "nbs/models/models.prod2vec.ipynb" ---
import pandas as pd
!wget -q --show-progress https://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx
df = pd.read_excel('Online Retail.xlsx')
df.dropna(inplace=True)
# Convert the StockCode to string datatype
df['StockCode']= df['StockCode'].astype(str)
# Check out the number of unique customers in our dataset
customers = df["CustomerID"].unique().tolist()
# shuffle customer ID's
import random
random.shuffle(customers)
# extract 90% of customer ID's
customers_train = [customers[i] for i in range(round(0.9*len(customers)))]
# split data into train and validation set
train_df = df[df['CustomerID'].isin(customers_train)]
validation_df = df[~df['CustomerID'].isin(customers_train)]
# list to capture purchase history of the customers
purchases_train = []
# populate the list with the product codes
from tqdm.notebook import tqdm
for i in tqdm(customers_train):
temp = train_df[train_df["CustomerID"] == i]["StockCode"].tolist()
purchases_train.append(temp)
# list to capture purchase history of the customers
purchases_val = []
# populate the list with the product codes
for i in tqdm(validation_df['CustomerID'].unique()):
temp = validation_df[validation_df["CustomerID"] == i]["StockCode"].tolist()
purchases_val.append(temp)
# train word2vec model
model = Prod2Vec(window=10, negative=5, size=100, min_count=2)
model.fit(purchases_train)
import warnings
warnings.filterwarnings('ignore')
products = train_df[["StockCode", "Description"]]
# remove duplicates
products.drop_duplicates(inplace=True, subset='StockCode', keep="last")
# create product-ID and product-description dictionary
products_dict = products.groupby('StockCode')['Description'].apply(list).to_dict()
random_sample = products.sample(1).values
recommendations = [[products_dict[a][0], b] for a,b in model.recommend(user_profile=random_sample[:,0])]
print(random_sample[:,1])
print(' ')
for rec in recommendations: print(rec)
random_sample = products.sample(5).values
recommendations = [[products_dict[a][0], b] for a,b in model.recommend(user_profile=random_sample[:,0])]
print(random_sample[:,1])
print(' ')
for rec in recommendations: print(rec)