--- title: Title keywords: fastai sidebar: home_sidebar summary: "summary" ---
%load_ext autoreload
%autoreload 2
%matplotlib inline
import gc
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from sklearn.metrics import (accuracy_score, dcg_score, roc_auc_score,
precision_score, recall_score)
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torch.optim import Adam
from htools import assert_raises, save, load
from incendio.core import *
from incendio.callbacks import *
from incendio.metrics import *
from incendio.optimizers import *
from incendio.utils import *
# Reproducible testing.
np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
class Data(Dataset):
def __init__(self, n=64, dim=2):
self.x = torch.rand(n, dim).float()
self.y = torch.clamp(
(self.x[:, 0]*.75 + self.x[:, 1]*.25).round(), 0, 1
).abs().unsqueeze(-1)
def __getitem__(self, i):
return self.x[i], self.y[i]
def __len__(self):
return len(self.x)
class EasyDataset(Dataset):
def __init__(self, n=10, scalar=8):
"""Larger scalar makes data easier to classify."""
self.x = torch.rand(n).float().unsqueeze(-1)
self.y = (self.x + torch.randn_like(self.x)/scalar > 0.5).float()
def __getitem__(self, i):
return self.x[i], self.y[i]
def __len__(self):
return len(self.x)
class MulticlassData(Dataset):
def __init__(self, n=64, dim=2):
self.x = torch.rand(n, dim).float()
# Integer labels between 0 and 4, inclusive.
self.y = torch.clamp(
torch.round(
torch.randint(6, (n, 1)).float()
* (self.x[:, 0]*.75 + self.x[:, 1]*.25).unsqueeze(-1)
), 0, 4).long().flatten()
def __getitem__(self, i):
return self.x[i], self.y[i]
def __len__(self):
return len(self.x)
ypred_ = torch.rand(5, 2)
y_ = torch.tensor([0, 1, 1, 0, 1])
ypred_, y_
class SimpleModel(BaseModel):
def __init__(self, dim):
super().__init__()
self.fc1 = nn.Linear(dim, 2)
self.fc2 = nn.Linear(2, 1)
def forward(self, x):
x = F.leaky_relu(self.fc1(x))
return self.fc2(x)
class GroupedModel(BaseModel):
def __init__(self, dim):
super().__init__()
g1 = nn.Sequential(
nn.Linear(dim, 8),
nn.LeakyReLU(),
nn.Linear(8, 4),
nn.LeakyReLU()
)
g2 = nn.Linear(4, 1)
self.groups = nn.ModuleList([g1, g2])
def forward(self, x):
for group in self.groups:
x = group(x)
return x
DIM = 2
snet = SimpleModel(DIM)
snet
optim = variable_lr_optimizer(snet, 2e-3)
optim
with assert_raises(ValueError) as ar:
optim = variable_lr_optimizer(snet, [3e-3, 1e-1])
optim
update_optimizer(optim, 1e-3, 0.5)
optim
snet.freeze()
for n in range(5):
snet.unfreeze(n_layers=n)
print(n, snet.trainable())
snet.freeze()
with assert_raises(AttributeError) as ar:
for n in range(3):
snet.unfreeze(n_groups=n)
print(n, snet.trainable())
DIM = 2
metrics = [accuracy_score,
precision_score,
recall_score,
percent_positive,
mean_soft_prediction
]
# Model starts out unfrozen and freezes last group startingn with epoch 3.
# This is not useful but is done here for testing purposes.
callbacks = [EarlyStopper('accuracy', 'max', patience=3),
PerformanceThreshold('recall', 'max', 0.25, skip_epochs=5),
MetricHistory(),
ModelUnfreezer({3:1}, 'groups'),
ModelCheckpoint(),
CosineLRScheduler(),
S3Uploader('gg-datascience', 'hmamin/incendio/v1')
]
train = Data(n=1_000, dim=DIM)
val = Data(n=30, dim=DIM)
dl_train = DataLoader(train, batch_size=8, shuffle=True)
dl_val = DataLoader(val, batch_size=8, shuffle=False)
gnet = GroupedModel(DIM)
t = Trainer(gnet, train, val, dl_train, dl_val, F.binary_cross_entropy_with_logits,
'binary', '../data/v5', optim_type=torch.optim.RMSprop,
last_act=torch.sigmoid, metrics=metrics, callbacks=callbacks)
t
t.save('tmp.pkl')
!ls ../data/v1
t.load(old_path='../data/v1/trainer.pkl')
d = load('../data/v1/trainer.pkl')
d['optim']
t.fit(5, 3e-1, 0.5, clean=True)
t.save('trainer.zip')
t2 = Trainer.from_file(os.path.join('..', 'data', 'v1', 'trainer.zip'))
print(t2)
del t2; gc.collect()
try:
for i in range(10):
time.sleep(1)
print(i)
except KeyboardInterrupt:
print('Interrupt')
class SimpleMulticlassModel(BaseModel):
def __init__(self, dim, classes):
super().__init__()
self.fc1 = nn.Linear(dim, 10)
self.fc2 = nn.Linear(10, classes)
def forward(self, x):
x = F.leaky_relu(self.fc1(x))
return self.fc2(x)
DIM = 2
metrics = [accuracy_score,
mean_soft_prediction
]
callbacks = [
# EarlyStopper('accuracy', 'max', patience=10),
PerformanceThreshold('loss', 'min', 2, skip_epochs=5),
MetricHistory(),
ModelCheckpoint(),
MetricHistory(),
CosineLRScheduler(),
S3Uploader('gg-datascience', 'hmamin/incendio/v2')
]
train = MulticlassData(n=88, dim=DIM)
val = MulticlassData(n=40, dim=DIM)
dl_train = DataLoader(train, batch_size=8, shuffle=True)
dl_val = DataLoader(val, batch_size=8, shuffle=False)
smnet = SimpleMulticlassModel(DIM, 5)
t = Trainer(smnet, train, val, dl_train, dl_val,
F.cross_entropy, 'multiclass', '../data/v1',
last_act=F.softmax, metrics=metrics, callbacks=callbacks)
t
t.fit(20, .3)
DIM = 2
metrics = [accuracy_score,
mean_soft_prediction
]
# Model starts out unfrozen and freezes last group startingn with epoch 3.
# This is not useful but is done here for testing purposes.
callbacks = [
MetricHistory(),
ModelCheckpoint(),
MetricHistory(),
]
train = EasyDataset(400, 100)
val = EasyDataset(40, 100)
dl_train = DataLoader(train, batch_size=8, shuffle=True)
dl_val = DataLoader(val, batch_size=8, shuffle=False)
snet = SimpleModel(1)
t = Trainer(snet, train, val, dl_train, dl_val,
F.binary_cross_entropy_with_logits, 'multiclass', '../data/v1',
last_act=torch.sigmoid, metrics=metrics, callbacks=callbacks)
t
t.fit(10, .1)
import inspect
[getattr(t, k, None) for k in inspect.signature(Trainer).parameters]
from spellotape.dl import inverse_sigmoid
inverse_sigmoid(.625)
*x, y = next(iter(dl_train))
x, y
yhat = smnet(*x)
yhat
y.shape, yhat.shape
F.softmax(yhat, dim=-1)
yhat.shape, y.shape
F.cross_entropy(yhat, y)
t = Trainer(smnet, train, val, dl_train, dl_val, F.cross_entropy, 'multiclass',
'../data/v2', 'datascience-delphi-dev', last_act=partial(F.softmax, dim=-1),
metrics=metrics, callbacks=callbacks)
t
import time
from functools import wraps
def catch_keyboard_interrupt(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
func(*args, **kwargs)
except KeyboardInterrupt:
print('interrupt', dir(func))
# This sets a class variable, not an instance var. Works but not ideal.
setattr(eval(Foo.bar.__qualname__.split('.')[0]), 'stop', True)
return
return wrapper
def catch_method_interrupt(meth_name):
def decorator(cls):
func = getattr(cls, meth_name)
print(func)
@wraps(func)
def wrapper(*args, **kwargs):
print(args, kwargs)
try:
func(*args, **kwargs)
except KeyboardInterrupt:
print('interrupt', dir(func))
# This sets a class variable, not an instance var. Works but not ideal.
setattr(cls, 'stop', True)
return
return wrapper
return decorator
class Foo:
def __init__(self, a):
self.a = a
@catch_keyboard_interrupt
def bar(self, b=3):
for i in range(b):
time.sleep(1)
print(self.a)
@catch_method_interrupt('train')
class Fizz:
def __init__(self, a):
self.a = a
def walk(self, t=5):
for i in range(t):
time.sleep(1)
print(i)
def train(self, epochs):
for e in epochs:
print(e)
time.sleep(1)
f = Foo(5)
f.__dict__
f.bar()
f.__dict__
Foo.stop
f.stop
fizz = Fizz(6)
fizz.walk()