Source code for trident.optims.pytorch_trainer
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import inspect
import os
import random
import shutil
import string
import sys
import time
import uuid
from functools import partial
from typing import List, Tuple
import numpy as np
import torch
import torch.nn as nn
from trident import __version__
from trident.optims.pytorch_constraints import get_constraint
from trident.optims.pytorch_losses import get_loss
from trident.optims.pytorch_metrics import get_metric
from trident.optims.pytorch_optimizers import get_optimizer
from trident.optims.pytorch_regularizers import get_reg
from trident.optims.trainers import ModelBase, progress_bar
from trident.backend.common import *
from trident.backend.pytorch_backend import *
from trident.layers.pytorch_layers import *
from trident.backend.pytorch_ops import *
from trident.callbacks.lr_schedulers import get_lr_scheduler
from trident.data.image_common import *
from trident.misc.visualization_utils import tile_rgb_images, loss_metric_curve
from trident.backend.optimizer import OptimizerBase
__all__ = ['TrainingItem', 'Model', 'ImageClassificationModel', 'ImageDetectionModel', 'ImageGenerationModel',
'ImageSegmentationModel','FaceRecognitionModel']
_, term_width = get_terminal_size()
term_width = int(term_width)
TOTAL_BAR_LENGTH = 65.
last_time = time.time()
begin_time = last_time
def _to_tuple(x):
if isinstance(x, tuple):
return x
elif isinstance(x, list):
return tuple(x)
else:
return x,
def make_deterministic(seed: int = 19260817, cudnn_deterministic: bool = False):
r"""Make experiment deterministic by using specific random seeds across
all frameworks and (optionally) use deterministic algorithms.
Args:
seed (int): The random seed to set.
cudnn_deterministic (bool): If `True`, set CuDNN to use
deterministic algorithms. Setting this to `True` can negatively
impact performance, and might not be necessary for most cases.
Defaults to `False`.
"""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
if cudnn_deterministic:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
[docs]class Model(ModelBase):
def __init__(self, inputs=None, output=None, input_shape=None):
super(Model, self).__init__(inputs, output, input_shape)
def _initial_graph(self, inputs=None, output=None, input_shape=None,initializer=None):
if output is None:
raise ValueError('There is at least one output')
if isinstance(output,(np.ndarray,torch.Tensor)) and input_shape is None:
input_shape=output.shape
if inputs is None:
if input_shape is None:
raise ValueError('You should assign inputs or input shape')
else:
input_shape = to_list(input_shape)
input_name = 'input_{0}'.format(len(self.inputs))
self.inputs[input_name] = input_shape
elif isinstance(inputs, (tuple, list)):
for m in range(len(inputs)):
inp=inputs[m]
if is_tensor(inp):
input_name = 'input_{0}'.format(m)
self.inputs[input_name] = list(int_shape(inp))[1:]
elif isinstance(inputs, dict):
for k, v in inputs.items():
if is_tensor(v):
self.inputs[k] = list(int_shape(v))[1:]
elif is_tensor(inputs):
self.inputs['input'] = list(int_shape(inputs))[1:]
#single model
if isinstance(output, (Layer, nn.Module)):
#update notes
output.nodes = OrderedDict([(mod.uuid, mod) for mod in list(output.modules())if isinstance(mod,Layer)])
for mod in output.modules():
if isinstance(mod, Layer):
mod.nodes = output.nodes
# output.cpu()
if output.built and hasattr(output,'_output_shape') and is_tensor(output._output_shape):
self._model = output
self._outputs['output'] = to_list(output._output_shape)
self._targets['target'] = to_list(output._output_shape)
else:
output.input_shape = input_shape
dummay_input = to_tensor(np.random.standard_normal((1,)+tuple(input_shape)).astype(np.float32)).to(get_device())
output.to(get_device())
output.eval()
out = output(dummay_input)
self._model = output
if isinstance(out, torch.Tensor):
self._outputs['output'] = to_list(out.size())[1:]
self._targets['target'] = to_list(out.size())[1:]
else:
for i in range(len(out)):
self._outputs['output_{0}'.format(i)] = to_list(out[i].size())[1:]
self._targets['target_{0}'.format(i)] = to_list(out[i].size())[1:]
self.signature = get_signature(self._model.forward)
elif isinstance(output, (List[nn.Module],Tuple[nn.Module])):
output_list = []
model_list = []
dummay_input = to_tensor(np.random.standard_normal((1,) + tuple(input_shape)).astype(np.float32)).to(get_device()) if not is_tensor(inputs) else inputs.to(get_device())
for op in output:
if isinstance(op, (Layer, nn.Module)):
out=op(dummay_input)
model_list.append(op)
output_list.extend(*out)
model = Combine(model_list)
self._model = model
self.name = model.name
for i in range(len(output_list)):
self._outputs['output_{0}'.format(i)] = list(int_shape(output_list[i]))[1:]
self._targets['target_{0}'.format(i)] = list(int_shape(output_list[i]))[1:]
self.signature = get_signature(self._model.forward)
elif isinstance(output,(np.ndarray,torch.Tensor)):
#style transfer , or adversarial attack
self._model =to_tensor(output,requires_grad=True)
self._outputs['output'] = list(int_shape(self._model))[1:]
self._targets['target'] = list(int_shape(self._model))[1:]
self.signature = OrderedDict()
self.signature['x']=list(int_shape(self._model))[1:]
else:
raise ValueError('Invalid output')
self.training_context['current_model'] = self._model
save_path = os.path.join('Models', '{0}.pth.tar_'.format(self._model.name))
self.save_path =sanitize_path(make_dir_if_need(save_path))
self.training_context['save_path'] = self.save_path
@property
def outputs(self):
return self._outputs
@property
def device(self):
return get_device()
[docs] def train(self):
if self._model is not None and isinstance(self._model, torch.Tensor):
pass
elif self._model is not None and isinstance(self._model,Layer) and self._model.built:
self._model.train()
else:
raise ValueError('There is no built model ,nothing to learn')
[docs] def eval(self):
if self._model is not None and isinstance(self._model, torch.Tensor):
pass
elif self._model is not None and isinstance(self._model,Layer)and self._model.built:
self._model.eval()
else:
raise ValueError('There is no built model ,nothing to evaluate')
@property
def layers(self):
if self._model is not None and isinstance(self._model, torch.Tensor):
return None
else:
return self._model._nodes
[docs] def complie(self, optimizer, losses=None, metrics=None, loss_weights=None, sample_weight_mode=None,
weighted_metrics=None, target_tensors=None):
self.with_optimizer(optimizer)
if losses is not None and isinstance(losses, (list, tuple)):
for loss in losses:
self.with_loss(loss)
if metrics is not None and isinstance(metrics, (list, tuple)):
for metric in metrics:
self.with_metric(metric)
return self
[docs] def with_optimizer(self, optimizer, **kwargs):
if isinstance(optimizer, str):
optimizer_class = get_optimizer(optimizer)
self.optimizer = optimizer_class(self._model.parameters() if isinstance(self._model, nn.Module) else [self._model], **kwargs)
else:
self.optimizer = optimizer(self._model.parameters() if isinstance(self._model, nn.Module) else [self._model], **kwargs)
# self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, verbose=True, mode='min',
# factor=0.5, patience=5, threshold=1e-4,
# cooldown=0, min_lr=1e-10, eps=1e-8)
self.base_lr = kwargs.get('lr', kwargs.get('learning_rate', 1e-3))
self.training_context['optimizer'] = self.optimizer
self.training_context['base_lr'] = self.base_lr
self.training_context['current_lr'] = self.base_lr
return self
[docs] def with_loss(self, loss, loss_weight=1, output_idx=0, start_epoch=0, name='', **kwargs):
alias = name
argnames = OrderedDict()
if (alias is None or len(alias) == 0) and hasattr(loss, '__name__'):
alias = loss.__name__
if isinstance(loss, str):
loss_class = get_loss(loss)
alias = loss if loss_class is not None else alias
if alias in self._losses:
dup_keys=[key for key in self._losses.key_list if alias+'_' in key]
alias = alias + '_' + str(len(dup_keys)+1)
self._losses[alias] = loss_class(**kwargs)
if hasattr(loss, 'forward'):
argnames = get_signature(self._losses[alias].forward)
else:
argnames = get_signature(self._losses[alias].__call__)
elif inspect.isclass(loss) and inspect._is_type(loss):
alias = loss.__class__.__name__ if alias is None or len(alias) == 0 else alias
if alias in self._losses:
dup_keys = [key for key in self._losses.key_list if alias + '_' in key]
alias = alias + '_' + str(len(dup_keys)+1)
self._losses[alias] = loss(**kwargs)
if hasattr(loss, 'forward'):
argnames = get_signature(self._losses[alias].forward)
else:
argnames = get_signature(self._losses[alias].__call__)
elif not inspect.isfunction(loss) and callable(loss):
alias = loss.__class__.__name__ if len(alias) == 0 else alias
if alias in self._losses:
dup_keys = [key for key in self._losses.key_list if alias + '_' in key]
alias = alias + '_' + str(len(dup_keys) + 1)
self._losses[alias] = loss
if hasattr(loss, 'forward'):
argnames = get_signature(self._losses[alias].forward)
else:
argnames = get_signature(self._losses[alias].__call__)
elif inspect.isfunction(loss):
if alias in self._losses:
dup_keys = [key for key in self._losses.key_list if alias + '_' in key]
alias = alias + '_' + str(len(dup_keys) + 1)
spec = inspect.getfullargspec(loss)
if len(spec.args) >= 2 and len(spec.args) - 0 if spec.defaults is None else len(spec.defaults) == 2 and (
spec.args[0] in ['output', 'y_pred', 'pred'] or 'target_' + spec.args[0] == spec.args[1]):
self._losses[alias] = loss
else:
self._losses[alias] = partial(loss, **kwargs)
argnames = get_signature(self._losses[alias], skip_default=True)
self.loss_weights[alias] = loss_weight
outputs = self.outputs
targets = self.targets
if all([k in targets.key_list or k in outputs.key_list for k in argnames.key_list]):
pass
elif outputs is not None and len(outputs) == 1 and len(argnames) == 2 and argnames.key_list[0] in ['input','output','y_pred'] and argnames.key_list[1] in ['target','label','y_true']:
argnames = OrderedDict()
argnames[outputs.key_list[0]] = outputs[outputs.key_list[0]]
argnames[targets.key_list[0]] = targets[targets.key_list[0]]
elif outputs is not None and len(outputs) == 1 and len(argnames) == 2 :
argnames[argnames.key_list[0]] = outputs[outputs.key_list[0]]
argnames[argnames.key_list[1]] = targets[targets.key_list[0]]
elif outputs is not None and len(outputs) > 1:
output_idx = list(output_idx) if isinstance(output_idx, (list, tuple)) else [output_idx]
if len(output_idx) == 1 and len(argnames) == 2:
argnames = OrderedDict()
out = outputs.key_list[output_idx[0]]
target = targets.key_list[output_idx[0]]
argnames[argnames.key_list[0]] = outputs[out]
argnames[argnames.key_list[1]] = targets[target]
elif len(output_idx) > 1 and len(argnames) == 2 * len(output_idx):
for idx in output_idx:
out = outputs.key_list[idx]
target = targets.key_list[idx]
if out in argnames:
argnames[out] = outputs[out]
if target in argnames:
argnames[target] = targets[target]
self._losses[alias].__name__ = alias
self._losses[alias].signature = argnames
self._losses[alias].start_epoch = start_epoch
print('{0} signature:{1}'.format(alias, self._losses[alias].signature.item_list))
return self
[docs] def with_metric(self, metric, output_idx=0,collect_history=None ,name='', **kwargs):
if collect_history is None:
collect_history=True
alias = name
argnames = OrderedDict()
if (alias is None or len(alias) == 0) and hasattr(metric, '__name__'):
alias = metric.__name__
if isinstance(metric, str):
alias = metric if len(alias) == 0 else alias
metric_class = get_metric(metric)
if alias in self._metrics:
dup_keys = [key for key in self._metrics.key_list if alias + '_' in key]
alias = alias + '_' + str(len(dup_keys) + 1)
self._metrics[alias] = metric_class(**kwargs)
if hasattr(metric, 'forward'):
argnames = get_signature(self._metrics[alias].forward)
else:
argnames = get_signature(self._metrics[alias].__call__)
elif inspect.isclass(metric) and inspect._is_type(metric):
alias = metric.__class__.__name__ if len(alias) == 0 else alias
if alias in self._metrics:
dup_keys = [key for key in self._metrics.key_list if alias + '_' in key]
alias = alias + '_' + str(len(dup_keys) + 1)
self._metrics[alias] = metric(**kwargs)
if hasattr(metric, 'forward'):
argnames = get_signature(self._metrics[alias].forward)
else:
argnames = get_signature(self._metrics[alias].__call__)
elif not inspect.isfunction(metric)and callable(metric):
alias = metric.__class__.__name__ if len(alias) == 0 else alias
if alias in self._metrics:
dup_keys = [key for key in self._metrics.key_list if alias + '_' in key]
alias = alias + '_' + str(len(dup_keys) + 1)
self._metrics[alias] = metric
if hasattr(metric, 'forward'):
argnames = get_signature(self._metrics[alias].forward)
else:
argnames = get_signature(self._metrics[alias].__call__)
elif inspect.isfunction(metric):
if alias in self._metrics:
dup_keys = [key for key in self._metrics.key_list if alias + '_' in key]
alias = alias + '_' + str(len(dup_keys) + 1)
spec = inspect.getfullargspec(metric)
if len(spec.args) >= 2 and len(spec.args) - 0 if spec.defaults is None else len(spec.defaults) == 2 and (
spec.args[0] in ['output', 'y_pred', 'pred'] or 'target_' + spec.args[0] == spec.args[1]):
self._metrics[alias] = metric
else:
self._metrics[alias] = partial(metric, **kwargs)
argnames = get_signature(self._metrics[alias], skip_default=True)
outputs = self.outputs
targets = self.targets
if all([k in targets.key_list or k in outputs.key_list for k in argnames.key_list]):
pass
elif outputs is not None and len(outputs) == 1 and len(argnames) == 2 and argnames.key_list[0] in ['input', 'output', 'y_pred'] and argnames.key_list[1] in ['target', 'label', 'y_true']:
argnames = OrderedDict()
argnames[outputs.key_list[0]] = outputs[outputs.key_list[0]]
argnames[targets.key_list[0]] = targets[targets.key_list[0]]
elif outputs is not None and len(outputs) == 1 and len(argnames) == 2:
argnames[argnames.key_list[0]] = outputs[outputs.key_list[0]]
argnames[argnames.key_list[1]] = targets[targets.key_list[0]]
elif outputs is not None and len(outputs) > 1:
output_idx = list(output_idx) if isinstance(output_idx, (list, tuple)) else [output_idx]
if len(output_idx) == 1 and len(argnames) == 2:
argnames = OrderedDict()
out = outputs.key_list[output_idx[0]]
target = targets.key_list[output_idx[0]]
argnames[argnames.key_list[0]] = outputs[out]
argnames[argnames.key_list[1]] = targets[target]
elif len(output_idx) > 1 and len(argnames) == 2 * len(output_idx):
for idx in output_idx:
out = outputs.key_list[idx]
target = targets.key_list[idx]
if out in argnames:
argnames[out] = outputs[out]
if target in argnames:
argnames[target] = targets[target]
self._metrics[alias].__name__ = alias
self._metrics[alias].signature = argnames
self._metrics[alias].collect_history=collect_history
print('{0} signature:{1}'.format(alias, self._metrics[alias].signature.item_list))
return self
[docs] def with_regularizer(self, reg, **kwargs):
if reg is None:
return self
reg_fn = None
if isinstance(reg, str):
reg_fn = get_reg(reg)
elif reg is callable:
reg_fn = reg
args = get_signature(reg_fn)
if 'reg_weight' in args:
args.pop('reg_weight')
self._regs[reg_fn.__name__] = partial(reg_fn, **kwargs)
self._regs[reg_fn.__name__].signature = args
return self
[docs] def with_constraint(self, constraint, **kwargs):
if constraint is None:
return self
constraint_fn = None
if isinstance(constraint, str):
constraint_fn = get_constraint(constraint)
if hasattr(constraint_fn, 'forward') and constraint_fn.__name__[-4:] == 'norm':
self._constraints[constraint_fn.__name__] = constraint_fn(**kwargs)
elif callable(constraint_fn) and constraint_fn.__name__[-4:] == 'norm':
self._constraints[constraint_fn.__name__] = partial(constraint_fn, **kwargs)
return self
[docs] def with_model_save_path(self, save_path, **kwargs):
if save_path is None or len(save_path)==0:
save_path=os.path.join('Models','{0}.pth.tar_'.format(self.name))
self.save_path = make_dir_if_need(save_path)
self.training_context['save_path'] = self.save_path
return self
[docs] def with_learning_rate_scheduler(self, lr_schedule, warmup=0, **kwargs):
if lr_schedule is None:
return self
if isinstance(lr_schedule, str):
lr_schedule = get_lr_scheduler(lr_schedule)
if callable(lr_schedule):
lr_scheduler = lr_schedule(**kwargs)
self.callbacks.append(lr_scheduler)
self.warmup = warmup
if self.warmup > 0:
self.optimizer.adjust_learning_rate(1e-6, False)
self.training_context['current_lr'] = 1e-6
return self
[docs] def adjust_learning_rate(self, lr):
self.optimizer.param_groups[0]['lr'] = lr
self.training_context['current_lr'] = lr
[docs] def rebinding_input_output(self, input_shape):
if self._model is not None and isinstance(self._model, torch.Tensor):
pass
elif input_shape == self._model._input_shape:
pass
else:
if len(self.inputs) == 1:
input_shape_list = to_numpy(input_shape).tolist()
self._model.input_shape = input_shape_list
self.inputs[self.inputs.key_list[0]] = input_shape_list
dummay_input = to_tensor(np.random.standard_normal((2, *input_shape_list))).to(
torch.device("cuda" if self._model.weights[0].data.is_cuda else "cpu"))
out = self._model(dummay_input)
if isinstance(out, torch.Tensor) == len(self.targets) == 1:
self.targets[self.targets.key_list[0]] = list(out.size())[1:] if len(out.size()) > 1 else []
self._model.output_shape = out.size()[1:]
elif isinstance(out, tuple):
for k in range(len(out)):
self.targets[self.targets.key_list[k]] = list(out[k].size())[1:] if len(out[k].size()) > 1 else []
[docs] def do_on_epoch_start(self):
# if self.training_context['current_epoch'] < self.warmup:
# lr = 1e-5 * (self.training_context['current_epoch'] + 1)
# self.optimizer.param_groups[0]['lr'] = lr
# self.training_context['current_lr'] = lr
# elif self.training_context['current_epoch'] == self.warmup:
# self.optimizer.param_groups[0]['lr'] = self.base_lr
# self.training_context['current_lr'] =self.base_lr
if self.model.device == 'cuda':
torch.cuda.empty_cache()
[docs] def do_on_epoch_end(self):
pass # if self.training_context['current_epoch'] > self.warmup: # if self.lr_scheduler is not None: # self.lr_scheduler.step(np.array(self.training_context['metrics'][list(self._metrics.keys())[0]]).mean()) # self.training_context['current_lr'] = self.optimizer.lr # if self.optimizer.param_groups[0]['lr'] < 1e-8: # self.optimizer.param_groups[0]['lr'] = 0.05 * self.base_lr # self.training_context['current_lr'] = 0.05 * self.base_lr # elif self.warmup>0 and self.training_context['current_epoch'] == self.warmup: # self.optimizer.adjust_learning_rate(self.base_lr, True) # self.training_context['current_lr'] =self.base_lr # elif self.warmup>0 and self.training_context['current_epoch'] < self.warmup: # self.optimizer.adjust_learning_rate(1e-5*(self.training_context['current_epoch']+1), True) # self.training_context['current_lr'] = 1e-5*(self.training_context['current_epoch']+1)
[docs] def do_on_batch_end(self):
if self.training_context['current_batch'] == 0:
temp = OrderedDict()
for k in self.training_context['losses'].key_list:
if len(self.training_context['losses'][k])>0:
temp[k] = self.training_context['losses'][k][-1]
print(temp)
[docs] def do_on_data_received(self, train_data, test_data):
# fields=train_data._fields
# for i in range(len(fields)):
if 'data_feed' not in self.training_context or len(self.training_context['data_feed']) == 0:
try:
data_feed = OrderedDict()
inshapes = self.inputs.value_list
outshapes = self.targets.value_list
available_fields = copy.deepcopy(train_data.key_list)
if train_data is not None:
# check input
for arg in self.signature.key_list:
data_feed[arg] = ''
for arg in self.signature.key_list:
if len(train_data) == 1:
data_feed[arg] = train_data.key_list[0]
available_fields.remove(train_data.key_list[0])
elif arg in available_fields:
data_feed[arg] = arg
available_fields.remove(arg)
elif arg in ['x', 'input'] and 'data' in available_fields:
data_feed[arg] = 'data'
available_fields.remove('data')
elif arg in ['x', 'input'] and 'image' in available_fields:
data_feed[arg] = 'image'
available_fields.remove('image')
elif arg == 'x' and 'input' in available_fields:
data_feed[arg] = 'input'
available_fields.remove('input')
elif len(self.signature.key_list) == 1:
for item in available_fields:
data_shape = list(train_data[item].shape[1:]) if len(train_data[item].shape) > 1 else []
if 'target' not in item and 'output' != item and data_shape == inshapes[0]:
data_feed[arg] = item
available_fields.remove(item)
break
else:
Warning(
'input argment {0} cannot mapping to any data, please check it and update the datafeed'.format(
arg))
if len(self.signature.key_list) == 1 and data_feed[self.signature.key_list[0]] != None:
self.training_context['data_feed'] = data_feed
# check for target
for i in range(len(self.targets)):
arg = self.targets.key_list[i]
data_feed[arg] = ''
if len(train_data) == 1:
data_feed[self.targets.key_list[0]] = train_data.key_list[0]
elif arg in available_fields:
data_feed[arg] = arg
available_fields.remove(arg)
elif arg == 'target' and 'label' in available_fields:
data_feed[arg] = 'label'
available_fields.remove('label')
elif arg == 'target' and len(available_fields) == 1:
data_feed[arg] = available_fields[0]
available_fields.remove(available_fields[0])
elif len(available_fields) > 0:
target_shape = outshapes[i]
for item in available_fields:
data_shape = list(train_data[item].shape[1:]) if len(train_data[item].shape) > 1 else []
if target_shape == data_shape:
data_feed[arg] = item
available_fields.remove(item)
elif ('int64' in str(train_data[item].dtype) or 'int32' in str(
train_data[item].dtype)) and target_shape[:-1] == data_shape:
data_feed[arg] = item
available_fields.remove(item)
else:
Warning(
'target argment {0} cannot mapping to any data, please check it and update the datafeed'.format(
arg))
if len(self.targets) == 1 and data_feed[self.targets.key_list[0]] != None:
self.training_context['current_target'] = train_data[data_feed[self.targets.key_list[0]]]
self.training_context['data_feed'] = data_feed
print('data_feed', data_feed)
except:
PrintException()
# convert to tensor
try:
data_feed = self.training_context['data_feed']
input_list = [data_feed[arg] for arg in self.signature.key_list]
for item in train_data.key_list:
if item in input_list:
# only model 's input argments
train_data[item] = to_tensor(train_data[item].copy(), requires_grad=True)
elif item in self.targets.key_list or data_feed:
train_data[item] = to_tensor(train_data[item].copy())
else:
train_data[item] = to_tensor(train_data[item].copy())
if test_data is not None and item in test_data:
test_data[item] = to_tensor(test_data[item].copy())
# check target
self.training_context['train_data'] = train_data
self.training_context['test_data'] = test_data
except:
PrintException()
return train_data, test_data
[docs] def do_gradient_update(self, log_gradients=False):
if self.training_context['stop_update'] <1:
self.training_context['current_loss'].backward(retain_graph=self.training_context['retain_graph'])
#only check once every epoch start.
if self.training_context['current_batch']==0:
if isinstance(self._model,nn.Module):
for name,para in self._model.named_parameters():
try:
if para is not None and para.grad is not None:
grad_norm=para.grad.norm()
if not 0<grad_norm<1e5:
sys.stderr.write('warning...Gradient norm {0} exceed 1e5 nor less-or-equal zero\n'.format(grad_norm))
except:
PrintException()
elif isinstance(self._model,torch.Tensor):
grad_norm = self._model.grad.norm()
if not 0 < grad_norm < 1e5:
sys.stderr.write('warning...Gradient norm {0} exceed 1e5 nor less-or-equal zero\n'.format(grad_norm))
if any_abnormal_number(grad_norm):
raise ValueError('grad_norm cannot has abnormal number (nan or inf).')
for callback in self.training_context['callbacks']:
callback.on_optimization_step_start(self.training_context)
if log_gradients:
self.log_gradient()
if self.training_context['stop_update'] == 0:
self.optimizer.step(self.get_current_loss)
elif 0 < self.training_context['stop_update'] < 1:
if random.random() <= self.training_context['stop_update']:
self.optimizer.step(self.get_current_loss)
else:
self.training_context['stop_update'] = self.training_context['stop_update'] - 1
for callback in self.training_context['callbacks']:
callback.on_optimization_step_end(self.training_context)
[docs] def do_on_progress_end(self):
if self.training_context['current_epoch'] > self.warmup:
if self.lr_scheduler is not None:
self.lr_scheduler.step(np.array(self.training_context['metrics'][list(self._metrics.keys())[0]]).mean())
self.training_context['current_lr'] = self.optimizer.lr
[docs] def log_gradient(self, grads=None):
grad_dict =OrderedDict()
if isinstance(self._model, nn.Module):
for i, (k, v) in enumerate(self._model.named_parameters()):
grad_dict[k] = to_numpy(v.grad)
self.gradients_history.append(grad_dict)
[docs] def log_weight(self, weghts=None):
weight_dict =OrderedDict()
if isinstance(self._model, nn.Module):
for k, v in self._model.named_parameters():
weight_dict[k] = to_numpy(v.data)
self.weights_history.append(weight_dict)
[docs] def save_model(self, save_path=None):
for callback in self.training_context['callbacks']:
callback.on_model_saving_start(self.training_context)
if any_abnormal_number(self._model):
raise ValueError(self._get_name() + ' nan detected!!')
if isinstance(self._model,nn.Module):
save_path=self.get_save_path(save_path,default_folder='Models',default_file_name= '{0}_epoch{1}.pth.tar_'.format(self._model.name,self.training_context['current_epoch']))
self._model.eval()
torch.save({
'state_dict': self._model.state_dict(),
'backend':'pytorch',
'trident_version':__version__,
'pytorch_version':torch.__version__,
'signature':self.signature
}, save_path)
self._model.train()
shutil.copy(save_path, save_path.replace('.pth.tar_','.pth.tar'))
elif isinstance(self._model,torch.Tensor):
save_path = self.get_save_path(save_path, default_folder='Models',default_file_name='{0}_epoch{1}.npy_'.format(self._model.name, self.training_context[ 'current_epoch']))
numpy_model=to_numpy(self._model)
np.save( sanitize_path(save_path),numpy_model)
shutil.copy(save_path, save_path.replace('.npy_', '.npy'))
os.strerror('Yor model is a Tensor not a nn.Module, it has saved as numpy array(*.npy) successfully. ')
else:
raise ValueError('only Layer or nn.Module as model can export to onnx, yours model is {0}'.format(type(self._model)))
[docs] def save_onnx(self, file_path, dynamic_axes=None):
if isinstance(self._model,nn.Module):
save_path = self.get_save_path(file_path, default_folder='Models',default_file_name='{0}_epoch{1}.onnx_'.format(self._model.name, self.training_context[ 'current_epoch']))
import torch.onnx
self._model.eval()
dummy_input = torch.randn(1, *to_list(self._model.input_shape), device=get_device(),requires_grad=True)
file, ext = os.path.splitext(file_path)
if ext is None or ext != '.onnx':
file_path = os.path.join(file, '.onnx')
self._model.to(get_device())
outputs = self._model(dummy_input)
# if dynamic_axes is None:
# dynamic_axes = {}
# for inp in self.inputs.key_list:
# dynamic_axes[inp] = {0: 'batch_size'}
# for out in output_names:
# dynamic_axes[out] = {0: 'batch_size'}
torch.onnx.export(self._model, # model being run
dummy_input, # model input (or a tuple for multiple inputs)
save_path, # where to save the model (can be a file or file-like object)
export_params=True, # store the trained parameter weights inside the model file
opset_version=10, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding for optimization
input_names=self.inputs.key_list, # the model's input names
output_names=self.outputs.key_list, # the model's output names
dynamic_axes= {self.inputs.key_list[0] : {0 : 'batch_size'}, # variable lenght axes
self.outputs.key_list[0]: {0 : 'batch_size'}})
self._model.train()
shutil.copy(save_path, save_path.replace('.onnx_', '.onnx'))
else:
raise ValueError('only Layer or nn.Module as model can export to onnx, yours model is {0}'.format(type(self._model)))
[docs] def load_model(self, file_path):
print('Loading pretrained model from {}'.format(file_path))
pretrained_dict = torch.load(file_path, map_location=torch.device(get_device()))
if "state_dict" in pretrained_dict.keys():
pretrained_dict = pretrained_dict['state_dict']
# else:
# pretrained_dict = remove_prefix(pretrained_dict, 'module.')
if check_keys(self._model, pretrained_dict):
self._model.load_state_dict(pretrained_dict, strict=False)
print('Model loaded!')
if self.signature is None:
self.signature = get_signature(self._model.forward)
self._model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
[docs] def summary(self):
# self.rebinding_input_output(self._model.input_shape)
summary(self._model, self.inputs.value_list)
def __str__(self):
self.__repr__()
def _get_name(self):
return self.__class__.__name__
def __repr__(self):
# We treat the extra repr like the sub-module, one item per line
extra_lines = []
extra_repr = self.extra_repr()
# empty string will be split into list ['']
if extra_repr:
extra_lines = extra_repr.split('\n')
child_lines = []
for key, value in self.__dict__.items():
if isinstance(value, OrderedDict):
for subkey, subvalue in value.items():
mod_str = repr(subvalue)
mod_str = addindent(mod_str, 2)
child_lines.append('(' + key + '): ' + mod_str)
else:
mod_str = repr(value)
mod_str = addindent(mod_str, 2)
child_lines.append('(' + key + '): ' + mod_str)
lines = extra_lines + child_lines
main_str = self._get_name() + '('
if lines:
# simple one-liner info, which most builtin Modules will use
if len(extra_lines) == 1 and not child_lines:
main_str += extra_lines[0]
else:
main_str += '\n ' + '\n '.join(lines) + '\n'
main_str += ')'
return main_str
def __dir__(self):
module_attrs = dir(self._model.__class__)
optimizer_attrs = dir(self.optimizer.__class__)
attrs = list(self.__dict__.keys())
losses = list(self._losses.keys())
metrics = list(self._metrics.keys())
output_regs = list(self._output_regs.keys())
model_regs = list(self._model_regs.keys())
constraints = list(self._constraints.keys())
keys = module_attrs + optimizer_attrs + attrs + losses + metrics + output_regs + model_regs + constraints
# Eliminate attrs that are not legal Python variable names
keys = [key for key in keys if not key[0].isdigit()]
return sorted(keys)
[docs]class ImageClassificationModel(Model):
def __init__(self, inputs=None, output=None, input_shape=None):
super(ImageClassificationModel, self).__init__(inputs, output, input_shape)
self._class_names = []
self._idx2lab = {}
self._lab2idx = {}
@property
def class_names(self):
return self._class_names
@class_names.setter
def class_names(self, value):
if self._class_names != value:
self._class_names = value
self._lab2idx = {v: k for k, v in enumerate(self._class_names)}
self._idx2lab = {k: v for k, v in enumerate(self._class_names)}
[docs] def index2label(self, idx: int):
if self._idx2lab is None or len(self._idx2lab.items()) == 0:
raise ValueError('You dont have proper mapping class names')
elif idx not in self._idx2lab:
raise ValueError('Index :{0} is not exist in class names'.format(idx))
else:
return self._idx2lab[idx]
[docs] def label2index(self, label):
if self._lab2idx is None or len(self._lab2idx.items()) == 0:
raise ValueError('You dont have proper mapping class names')
elif label not in self._lab2idx:
raise ValueError('label :{0} is not exist in class names'.format(label))
else:
return self._lab2idx[label]
[docs] def infer_single_image(self, img, topk=1):
if self._model.built:
self._model.eval()
img = image2array(img)
if img.shape[-1] == 4:
img = img[:, :, :3]
for func in self.preprocess_flow:
if inspect.isfunction(func) and func is not image_backend_adaption:
img = func(img)
img = image_backend_adaption(img)
inp = to_tensor(np.expand_dims(img, 0)).to(
torch.device("cuda" if self._model.weights[0].data.is_cuda else "cpu")).to(
self._model.weights[0].data.dtype)
result = self._model(inp)
result = to_numpy(result)[0]
if self.class_names is None or len(self.class_names)==0:
return result
else:
# argresult = np.argsort(result)
# argresult1 =argresult[::-1]
answer = OrderedDict()
idxs = list(np.argsort(result)[::-1][:topk])
for idx in idxs:
prob = result[idx]
answer[self.index2label(idx)] = (idx, prob)
# idx=int(np.argmax(result,-1)[0])
return answer
else:
raise ValueError('the model is not built yet.')
[docs]class ImageDetectionModel(Model):
def __init__(self, inputs=None, output=None, input_shape=None):
super(ImageDetectionModel, self).__init__(inputs, output, input_shape)
self.preprocess_flow = []
self.detection_threshould = 0.5
@property
def reverse_preprocess_flow(self):
return_list = []
for i in range(len(self.preprocess_flow)):
fn = self.preprocess_flow[-1 - i]
if fn.__qualname__ == 'normalize.<locals>.img_op':
return_list.append(unnormalize(fn.mean, fn.std))
return return_list
[docs] def infer_single_image(self, img, scale=1):
if self._model.built:
self._model.to(self.device)
self._model.eval()
img = image2array(img)
if img.shape[-1] == 4:
img = img[:, :, :3]
for func in self.preprocess_flow:
if inspect.isfunction(func):
img = func(img)
img = image_backend_adaption(img)
inp = to_tensor(np.expand_dims(img, 0)).to(
torch.device("cuda" if self._model.weights[0].data.is_cuda else "cpu")).to(
self._model.weights[0].data.dtype)
result = self._model(inp)
bboxes = self.generate_bboxes(*result, threshould=self.detection_threshould, scale=scale)
bboxes = self.nms(bboxes)
# idx=int(np.argmax(result,-1)[0])
return bboxes
else:
raise ValueError('the model is not built yet.')
[docs]class ImageSegmentationModel(Model):
def __init__(self, inputs=None, output=None, input_shape=None):
super(ImageSegmentationModel, self).__init__(inputs, output, input_shape)
self.preprocess_flow = []
[docs]class ImageGenerationModel(Model):
def __init__(self, inputs=None, output=None, input_shape=None):
super(ImageGenerationModel, self).__init__(inputs, output, input_shape)
self.preprocess_flow = []
@property
def reverse_preprocess_flow(self):
return_list = []
for i in range(len(self.preprocess_flow)):
fn = self.preprocess_flow[-1 - i]
if fn.__qualname__ == 'normalize.<locals>.img_op':
return_list.append(unnormalize(fn.mean, fn.std))
return return_list
[docs] def infer_single_image(self, img):
if self._model.built:
self._model.eval()
img = image2array(img)
if img.shape[-1] == 4:
img = img[:, :, :3]
for func in self.preprocess_flow:
if inspect.isfunction(func) and func is not image_backend_adaption:
img = func(img)
img = image_backend_adaption(img)
inp = to_tensor(np.expand_dims(img, 0)).to(torch.device("cuda" if self._model.weights[0].data.is_cuda else "cpu")).to(self._model.weights[0].data.dtype)
result = self._model(inp)
result = to_numpy(result)[0]
for func in self.reverse_preprocess_flow:
if inspect.isfunction(func):
result = func(result)
result = array2image(result)
return result
[docs]class FaceRecognitionModel(Model):
def __init__(self, inputs=None, output=None, input_shape=None):
super(FaceRecognitionModel, self).__init__(inputs, output, input_shape)
self._class_names = []
self.preprocess_flow = []
self._idx2lab = {}
self._lab2idx = {}
@property
def reverse_preprocess_flow(self):
return_list = []
return_list.append(reverse_image_backend_adaption)
for i in range(len(self.preprocess_flow)):
fn = self.preprocess_flow[-1 - i]
if fn.__qualname__ == 'normalize.<locals>.img_op':
return_list.append(unnormalize(fn.mean, fn.std))
return_list.append(array2image)
return return_list
[docs] def get_embedded(self,img_path):
def norm(x):
b = np.sqrt(np.sum(np.square(x)))
return x / (b if b != 0 else 1)
img = image2array(img_path)
img = resize((224, 224), keep_aspect=True)(img)
img = normalize([131.0912, 103.8827, 91.4953], [1, 1, 1])(img)
img = to_tensor(np.expand_dims(img.transpose([2, 0, 1]), 0))
embedding = self.model(img)[0]
return norm(embedding)
[docs] def infer_single_image(self, img):
def norm(x):
b = np.sqrt(np.sum(np.square(x)))
return x / (b if b != 0 else 1)
if isinstance(self._model,Layer) and self._model.built:
self._model.eval()
img = image2array(img)
if img.shape[-1] == 4:
img = img[:, :, :3]
for func in self.preprocess_flow:
if inspect.isfunction(func) and func is not image_backend_adaption:
img = func(img)
img = image_backend_adaption(img)
inp = to_tensor(np.expand_dims(img, 0)).to(torch.device("cuda" if self._model.weights[0].data.is_cuda else "cpu")).to(self._model.weights[0].data.dtype)
result = self._model(inp)[0]
embedding = to_numpy(result)
return norm(embedding)
else:
raise ValueError('the model is not built yet.')
class LanguageModel(Model):
def __init__(self, inputs=None, output=None, input_shape=None):
super(LanguageModel, self).__init__(inputs, output, input_shape)
self.preprocess_flow = []
TrainingItem = Model