--- title: Title keywords: fastai sidebar: home_sidebar ---
%load_ext autoreload
%autoreload 2
from collections import defaultdict
from functools import partial
import gc
from operator import add
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from htools import InvalidArgumentError
from incendio.core import BaseModel
from incendio.layers import JRelu, GRelu, mish
Maybe skip connections could be useful for tabular data? Apparently usually not useful, maybe if network is very deep. Similar idea may be used here: https://arxiv.org/abs/1708.05123
# class DenseLinear(BaseModel):
# def __init__(self, x_dim, hidden_dim, activation=mish, skip_size=2):
# super().__init__()
# assert hidden_dim >= 2 ** (skip_size-1), ('Increase hidden dimension '
# 'or decrease skip size.')
# self.activation = activation
# self.skip_size = skip_size
# self.layers = nn.ModuleList([nn.Linear(x_dim, hidden_dim)])
# prev_dim = hidden_dim
# for i in range(1, skip_size):
# new_dim = prev_dim // 2
# new_layer = nn.Linear(prev_dim, new_dim)
# self.layers.append(new_layer)
# prev_dim = new_dim
# def forward(self, x):
# out = x
# for i, layer in enumerate(self.layers):
# out = layer(out)
# if i != self.skip_size - 1:
# out = self.activation(out)
# return self.activation(torch.cat((x, out), dim=1))
class DenseLinear(BaseModel):
def __init__(self, x_dim, layer_dims, activation=mish):
super().__init__()
self.skip_size = len(layer_dims)
self.activation = activation
self.layers = nn.ModuleList([nn.Linear(d_in, d_out) for d_in, d_out
in zip([x_dim]+list(layer_dims), layer_dims)])
def forward(self, x):
out = x
for i, layer in enumerate(self.layers, 1):
out = layer(out)
if i < self.skip_size: out = self.activation(out)
return self.activation(torch.cat((x, out), dim=1))
def concat(*args, dim=-1):
return torch.cat(args, dim=dim)
class LinearSkipBlock(nn.Module):
def __init__(self, x_dim, layer_dims, op, activation=mish):
super().__init__()
self.skip_size = len(layer_dims)
self.activation = activation
self.layers = nn.ModuleList([nn.Linear(d_in, d_out) for d_in, d_out
in zip([x_dim]+list(layer_dims), layer_dims)])
self.op = op
def forward(self, x):
out = x
for i, layer in enumerate(self.layers, 1):
out = layer(out)
if i < self.skip_size: out = self.activation(out)
print(self.op(x, out))
return self.activation(self.op(x, out))
# class ResLinear(BaseModel):
# def __init__(self, x_dim, hidden_dims, activation=JRelu):
# super().__init__()
# self.skip_size = len(hidden_dims)
# self.activation = activation
# self.layers = nn.ModuleList([
# nn.Linear(in_dim, out_dim) for in_dim, out_dim
# in zip([x_dim] + list(hidden_dims), list(hidden_dims) + [x_dim])
# ])
# def forward(self, x):
# out = x
# for i, layer in enumerate(self.layers):
# out = layer(out)
# if i != self.skip_size - 1:
# out = self.activation(out)
# return self.activation(x + out)
class LinearResBlock(LinearSkipBlock):
def __init__(self, x_dim, hidden_dims, activation=mish):
if hidden_dims[-1] != x_dim:
raise InvalidArgumentError(
'Last hidden dimension must match input dimension.'
)
super().__init__(x_dim, hidden_dims, add, activation)
class LinearDenseBlock(LinearSkipBlock):
def __init__(self, x_dim, hidden_dims, activation=mish):
super().__init__(x_dim, hidden_dims, concat, activation)
x = torch.tensor([3, 5, 1])
i = [1]*2
x.view(-1, *i)
t1.ndim
def weighted_avg(*args, weights):
weights = torch.tensor(weights)
total = weights.sum().float()
if total != 1: weights = weights / total
res = torch.stack(args)
weights_shape = [-1 if i == 0 else 1 for i, _ in enumerate(range(res.ndim))]
return torch.mean(res * weights.view(*weights_shape), axis=0)
t1 = torch.arange(5).float()
t2 = torch.ones(5).float()
t3 = torch.randn(5).float()
print(t1, t2, t3)
weighted_avg(t1, t2, t3, weights=[3, 5, 1])
t1 = torch.arange(6).view(3, 2).float()
t2 = torch.ones(3, 2).float()
t3 = torch.randn(3, 2).float()
print(t1, t2, t3, sep='\n')
weighted_avg(t1, t2, t3, weights=[3, 5, 1])
torch.tensor([[0.3333],
[0.5556],
[0.1111]]).shape
# * torch.tensor([[[ 0.0000, 1.0000],
# [ 2.0000, 3.0000],
# [ 4.0000, 5.0000]],
# [[ 1.0000, 1.0000],
# [ 1.0000, 1.0000],
# [ 1.0000, 1.0000]],
# [[-1.5408, -0.7166],
# [ 0.6646, 0.2769],
# [-0.3888, 0.1351]]])
bs = 4
feature_dim = 6
hidden_dim = 16
model = LinearDenseBlock(feature_dim, [hidden_dim, 9, 3, 5])
x = torch.randint(5, size=(bs, feature_dim)).float()
x
model
out = model(x)
print(out.shape)
out
model.trainable()
model.dims()
x
bs = 4
feature_dim = 6
hidden_dim = 16
model = ResLinear(feature_dim, [hidden_dim, 12])
x = torch.randint(5, size=(bs, feature_dim)).float()
x
model
model(x).shape
bs = 4
feature_dim = 6
hidden_dim = 16
model = LinearResBlock(feature_dim, [hidden_dim, hidden_dim//4])
x = torch.randint(5, size=(bs, feature_dim)).float()
x
model
model(x).shape
bs = 4
feature_dim = 6
hidden_dim = 16
model = LinearResBlock(feature_dim, [feature_dim])
x = torch.randint(5, size=(bs, feature_dim)).float()
print(x)
model
model(x).shape
from pprint import pprint
class NestedModel(BaseModel):
def __init__(self, c_in, hidden_dims):
super().__init__(locals())
self.fc = nn.Linear(c_in, hidden_dims[0])
self.relu = nn.ReLU()
linears = [nn.Linear(d0, d1) for d0, d1 in zip(hidden_dims,
hidden_dims[1:])]
linears = list(zip(linears, [nn.LeakyReLU()
for i in range(len(linears))]))
self.seq = nn.Sequential(*[arg for pair in linears for arg in pair])
self.mod = nn.ModuleList([nn.MaxPool1d(3),
nn.Linear(hidden_dims[-1]//3, 3)])
def forward(self, x):
x = self.relu(self.fc(x))
x = self.seq(x).unsqueeze(1)
print(x)
for layer in self.mod:
x = layer(x)
print(x)
if 'Linear' in str(type(layer)):
x = F.softmax(x, dim=-1)
return x
x = torch.randint(10, (4, 6)).float()
print(x)
mod = NestedModel(6, [12, 10, 8, 6])
mod(x)
mod
# Gets list of all layers, extracting them from within modules and
# sequentials. Don't think we typically need to do this though.
layers = []
for c in mod.children():
children = list(c.children())
if not children:
layers.append(c)
else:
layers.extend(children)
layers
for c in mod.children():
print([(p.shape, p.requires_grad) for p in c.parameters()])
# Model.parameters() seems to automatically iterate through each param in each
# child, same as in the cell above. Recurse=False oddly returns empty list?
[(p.shape, p.requires_grad) for p in mod.parameters(recurse=True)]
pprint(list(mod.children()))
# Seems to start at high level and then dive deeper into model. So first item
# is whole model; then we get single layers, sequentials, and module lists;
# then linear layers in the sequentials and module lists.
# Not sure when this would be useful.
pprint(list(mod.modules()))
# Also have named_parameters() which is almost identical, but each item is a
# tuple of name_str and weights.
pprint(dict(enumerate(mod.parameters())))
mod
def freeze(x):
for p in x.parameters():
p.requires_grad = False
for p in mod.parameters():
p.requires_grad = True
pprint(mod.trainable())
mod.apply(freeze)
print()
pprint(mod.trainable())
# Uses outputs of model.children()
mod.apply(lambda x: print('TYPE', type(x), list(x.parameters()), '\n'))
class NestedSequentials(BaseModel):
def __init__(self, x_dim, dims):
super().__init__(locals())
self.enc = nn.Sequential(nn.Linear(x_dim, dims[0]),
nn.LeakyReLU(),
nn.Linear(dims[0], dims[1]),
nn.LeakyReLU())
self.trans = nn.Sequential(nn.MaxPool1d(3),
nn.ReLU(),
nn.Linear(dims[1]//3, 4),
nn.ReLU())
self.dec = nn.Sequential(nn.Linear(4, 3),
nn.Softmax(-1))
self.blocks = nn.ModuleList([self.enc, self.trans, self.dec])
def forward(self, x):
for i, block in enumerate(self.blocks):
if i == 1:
x = x.unsqueeze(1)
x = block(x)
return x.squeeze(1)
bs = 4
x_dim = 5
dims = [8, 6]
x = torch.randint(6, size=(bs, x_dim), dtype=torch.float)
print(x)
nested = NestedSequentials(x_dim, dims)
nested
nested._init_variables
nested(x)
nested.weight_stats()
for p in nested.parameters():
p.requires_grad = False
print(p.shape, p.requires_grad)
p.requires_grad = True
print(p.shape, p.requires_grad, '\n')
print('-'*20)
nested.apply(freeze)
for p in nested.parameters():
print(p.shape, p.requires_grad)
nested.save(11)
model = NestedSequentials.from_path('data/model_11.pth')
model(x)
model.weight_stats()
def variable_lr_optimizer(groups, lrs, optimizer=torch.optim.Adam, **kwargs):
"""Get an optimizer that uses different learning rates for different layer
groups. Additional keyword arguments can be used to alter momentum and/or
weight decay, for example, but for the sake of simplicity these values
will be the same across layer groups.
Parameters
-----------
groups: nn.ModuleList
For this use case, the model should contain a ModuleList of layer
groups in the form of Sequential objects. This variable is then passed
in so each group can receive its own learning rate.
lrs: list[float]
A list containing the learning rates to use for each layer group. This
should be the same length as the number of layer groups in the model.
At times, we may want to use the same learning rate for all groups,
and can achieve this by passing in a list containing a single float.
optimizer: torch optimizer
The Torch optimizer to be created (Adam by default).
Examples
---------
optim = variable_lr_optimizer(model.groups, [3e-3, 3e-2, 1e-1])
"""
if len(lrs) == 1:
lrs *= len(groups)
data = [{'params': group.parameters(), 'lr': lr}
for group, lr in zip(groups, lrs)]
return optimizer(data, **kwargs)
variable_lr_optimizer(nested.blocks, [.5, .05, .005], weight_decay=.75)
def save(self, epoch, dir_='data', file_pre='model', verbose=True, **kwargs):
"""Save model weights.
Parameters
-----------
epoch: int
The epoch of training the weights correspond to.
dir_: str
The directory which will contain the output file.
file_pre: str
The first part of the file name to save the weights to. The epoch
and file extension will be added automatically.
verbose: bool
If True, print message to notify user that weights have been saved.
**kwargs: any type
User can optionally provide additional information to save
(e.g. optimizer state dict).
"""
os.makedirs(dir_, exist_ok=True)
file = f'{file_pre}_{epoch}.pth'
path = os.path.join(dir_, file)
data = dict(weights=self.state_dict(),
epoch=epoch,
params=self._init_variables)
data = {**data, **kwargs}
torch.save(data, path)
if verbose:
print(f'Weights saved from epoch {epoch}.')
@classmethod
def from_path(cls, path, verbose=True):
"""Factory method to load a model from a file containing saved weights.
Parameters
-----------
path: str
File path to load weights from.
verbose: bool
If True, print message notifying user which weights have been loaded
and what mode the model is in.
"""
data = torch.load(path)
model = cls(**data['params'])
model.load_state_dict(data['weights'])
model.eval()
if verbose:
print(f'Weights loaded from epoch {data["epoch"]}. '
'Currently in eval mode.')
return model
import matplotlib.pyplot as plt
from htools.ml import stats
def hook_plot_gradients(grads):
fig, ax = plt.subplots()
ax.hist(grads.flatten().numpy())
plt.show()
def clip_gradients(grads):
return torch.clamp(grads, -1000, 1000)
def penalty(y):
"""Arbitrarily chosen."""
return y.sum(-1).var()
x2 = torch.randint(0, 10, (4, 8)).float()
x2
w = torch.randn((8, 2), requires_grad=True)
w.register_hook(hook_plot_gradients)
w
y = x2 @ w
cost = penalty(y)
cost
w.grad
cost.backward()
w.grad
def train(hooks, iters=5, lr=.1):
x = torch.randint(6, (4, 128), dtype=torch.float)
w = torch.randn((128, 2), requires_grad=True)
# w.register_hook(hook_plot_gradients)
# w.register_hook(lambda x: print(stats(x)))
for hook in hooks:
w.register_hook(hook)
for i in range(iters):
if w.grad is not None: w.grad.zero_()
y = x @ w
cost = penalty(y)
cost.backward()
with torch.no_grad():
w -= lr * w.grad
hooks = [hook_plot_gradients,
lambda x: print(stats(x)),
clip_gradients,
lambda x: print(torch.mean(x), torch.std(x))]
train(hooks, 10)
x.is_leaf
y.is_leaf
model