--- title: Criteo keywords: fastai sidebar: home_sidebar summary: "Criteo dataset." description: "Criteo dataset." nb_path: "nbs/datasets/datasets.criteo.ipynb" ---
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.metrics import log_loss, roc_auc_score
from recohut.models.dcn import DCNv2 as DCN
def get_auc(loader, model):
pred, target = [], []
model.eval()
with torch.no_grad():
for x, y in loader:
x, y = x.to(device).float(), y.to(device).float()
y_hat = model(x)
pred += list(y_hat.cpu().numpy())
target += list(y.cpu().numpy())
auc = roc_auc_score(target, pred)
return auc
root = '/content/data'
batch_size = 1024
lr = 1e-2
wd = 1e-3
epoches = 20
seed = 2022
embedding_size = 4
device = 'cpu'
ds = CriteoSampleDataset(root=root)
train_tensor_data, test_tensor_data = ds.load()
train_loader = DataLoader(train_tensor_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_tensor_data, batch_size=batch_size)
model = DCN(ds.feat_sizes, embedding_size, ds.linear_feature_columns, ds.dnn_feature_columns).to(device)
loss_func = nn.BCELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
for epoch in range(epoches):
total_loss_epoch = 0.0
total_tmp = 0
model.train()
for index, (x, y) in enumerate(train_loader):
x, y = x.to(device).float(), y.to(device).float()
y_hat = model(x)
optimizer.zero_grad()
loss = loss_func(y_hat, y)
loss.backward()
optimizer.step()
total_loss_epoch += loss.item()
total_tmp += 1
auc = get_auc(test_loader, model)
print('epoch/epoches: {}/{}, train loss: {:.3f}, test auc: {:.3f}'.format(epoch, epoches, total_loss_epoch / total_tmp, auc))
%reload_ext watermark
%watermark -a "Sparsh A." -m -iv -u -t -d -p recohut