--- title: Title keywords: fastai sidebar: home_sidebar ---
{% raw %}
{% endraw %} {% raw %}
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from htools import *
{% endraw %} {% raw %}
class SeqDataset(Dataset):
    
    def __init__(self, sentence_tokens, labels):
        self.x = sentence_tokens
        self.y = labels
        
    def __getitem__(self, i):
        return self.x[i], self.y[i]
        
    def __len__(self):
        return len(self.x)
{% endraw %} {% raw %}
class PositionalAdjustment(nn.Module):
    
    def __init__(self, seq_len, emb_len):
        super().__init__()
        self.weight = nn.Parameter(torch.Tensor(seq_len, emb_len))
        self.weight.data.uniform_(-1, 1)
        
    def forward(self, x):
        return x + self.weight
{% endraw %} {% raw %}
pos = PositionalAdjustment(3, 5)
pos.weight
Parameter containing:
tensor([[-0.0878, -0.3883, -0.8043, -0.2112, -0.8069],
        [ 0.7833,  0.1276, -0.5188,  0.5671, -0.8165],
        [ 0.4665, -0.1780,  0.3808, -0.4395,  0.7865]], requires_grad=True)
{% endraw %} {% raw %}
class SeqNet(nn.Module):
    
    def __init__(self, vocab_size, emb_len, seq_len):
        super().__init__()
        self.emb = nn.Embedding(vocab_size, emb_len)
        self.pos = PositionalAdjustment(seq_len, emb_len)
#         self.emb_t = nn.Embedding(seq_len, emb_len)
#         self.t_idx = torch.arange(0, seq_len, dtype=torch.long)
    
    def forward(self, x):
#         return self.emb(x) + self.emb_t(self.t_idx)
        x = self.emb(x)
        print(x)
        return self.pos(x)
{% endraw %} {% raw %}
v_size = 5
e_len = 4
s_len = 3
bs = 2
{% endraw %} {% raw %}
sent_tokens = torch.randint(0, 5, (8, 3))
labels = torch.randint(0, 2, size=(8,))
ds = SeqDataset(sent_tokens, labels)
dl = DataLoader(ds, batch_size=bs, shuffle=False)
x, y = next(iter(dl))
print(x, y)
tensor([[0, 3, 4],
        [4, 4, 1]]) tensor([0, 1])
{% endraw %} {% raw %}
net = SeqNet(v_size, e_len, s_len)
net
SeqNet(
  (emb): Embedding(5, 4)
  (pos): PositionalAdjustment()
)
{% endraw %} {% raw %}
net.emb.weight
Parameter containing:
tensor([[-0.2536,  1.8791,  2.1079,  1.7764],
        [-2.3785, -1.0723,  1.1331, -0.1812],
        [-0.7185, -0.6620, -0.3901,  2.9127],
        [-2.1197,  0.3470, -0.3491,  0.1351],
        [-0.3396, -0.5042, -1.4771,  0.5427]], requires_grad=True)
{% endraw %} {% raw %}
net.pos.weight
Parameter containing:
tensor([[ 0.4808, -0.4810, -0.3525,  0.1046],
        [ 0.9008, -0.3267, -0.8862, -0.6773],
        [ 0.0175,  0.0239,  0.3719, -0.9512]], requires_grad=True)
{% endraw %} {% raw %}
yhat = net(x)
yhat.shape
tensor([[[-0.2536,  1.8791,  2.1079,  1.7764],
         [-2.1197,  0.3470, -0.3491,  0.1351],
         [-0.3396, -0.5042, -1.4771,  0.5427]],

        [[-0.3396, -0.5042, -1.4771,  0.5427],
         [-0.3396, -0.5042, -1.4771,  0.5427],
         [-2.3785, -1.0723,  1.1331, -0.1812]]], grad_fn=<EmbeddingBackward>)
torch.Size([2, 3, 4])
{% endraw %} {% raw %}
yhat
tensor([[[ 0.2272,  1.3980,  1.7555,  1.8809],
         [-1.2188,  0.0203, -1.2352, -0.5422],
         [-0.3221, -0.4803, -1.1052, -0.4085]],

        [[ 0.1411, -0.9852, -1.8295,  0.6472],
         [ 0.5612, -0.8309, -2.3632, -0.1347],
         [-2.3610, -1.0484,  1.5050, -1.1324]]], grad_fn=<AddBackward0>)
{% endraw %} {% raw %}
fc = nn.Linear(4, 1)
fc.weight
Parameter containing:
tensor([[ 0.3051, -0.2961,  0.4608, -0.0259]], requires_grad=True)
{% endraw %} {% raw %}
# First row of item 1 in batch dotted with weight matrix.
-1.2705*.3051 -.2961*4.62 +.4608*-.6619 +.5565*.0259 + fc.bias
tensor([-2.1298], grad_fn=<AddBackward0>)
{% endraw %} {% raw %}
yhat @ fc.weight.t() + fc.bias
tensor([[[-2.1299],
         [ 1.0206],
         [-0.1805]],

        [[-1.0462],
         [-0.2085],
         [-1.2599]]], grad_fn=<AddBackward0>)
{% endraw %} {% raw %}
fc(yhat)
tensor([[[-2.1299],
         [ 1.0206],
         [-0.1805]],

        [[-1.0462],
         [-0.2085],
         [-1.2599]]], grad_fn=<AddBackward0>)
{% endraw %} {% raw %}
class Dropin(nn.Module):
    """Think about if this would need to work differently in training vs.
    eval mode, like multiplicative dropout.
    
    Work in progress, not sure if xavier normal is a good choice - just an 
    example.
    
    Also look into if floating point addition might be faster/slower on gpu
    than multiplication.
    """
    
    def __init__(self, *dims):
        super().__init__()
        self.weight = nn.Parameter(torch.Tensor(*dims))
        nn.init.xavier_normal_(self.weight.data)
        
    def forward(self, x):
        return x + self.weight
{% endraw %} {% raw %}
drop = Dropin(*yhat.shape)
drop.weight
Parameter containing:
tensor([[[ 0.2117,  0.6571, -1.0676, -0.6031],
         [ 0.7047,  0.7008,  0.2394, -0.0974],
         [-0.0186,  0.0943,  0.0224,  0.4781]],

        [[-0.1908, -0.3783, -0.1678, -0.6919],
         [ 0.4274, -0.0934,  0.2975, -0.4185],
         [-0.0236, -0.0022,  0.5413,  0.0780]]], requires_grad=True)
{% endraw %} {% raw %}
yhat
tensor([[[ 0.2272,  1.3980,  1.7555,  1.8809],
         [-1.2188,  0.0203, -1.2352, -0.5422],
         [-0.3221, -0.4803, -1.1052, -0.4085]],

        [[ 0.1411, -0.9852, -1.8295,  0.6472],
         [ 0.5612, -0.8309, -2.3632, -0.1347],
         [-2.3610, -1.0484,  1.5050, -1.1324]]], grad_fn=<AddBackward0>)
{% endraw %} {% raw %}
drop(yhat)
tensor([[[ 0.4389,  2.0551,  0.6879,  1.2779],
         [-0.5141,  0.7211, -0.9958, -0.6397],
         [-0.3407, -0.3860, -1.0828,  0.0695]],

        [[-0.0497, -1.3636, -1.9973, -0.0447],
         [ 0.9886, -0.9243, -2.0657, -0.5532],
         [-2.3846, -1.0506,  2.0464, -1.0544]]], grad_fn=<AddBackward0>)
{% endraw %} {% raw %}
torch.corr?
Object `torch.corr` not found.
{% endraw %} {% raw %}
np.corrcoef(yhat.detach().numpy().flatten(),
            drop(yhat).detach().numpy().flatten())
array([[1.        , 0.92782724],
       [0.92782724, 1.        ]])
{% endraw %} {% raw %}
np.corrcoef(drop.weight.detach().numpy().flatten(),
            drop(yhat).detach().numpy().flatten())
array([[1.        , 0.17803766],
       [0.17803766, 1.        ]])
{% endraw %}