trident
trident
Table Of Contents
trident
Table Of Contents

Source code for trident.models.pytorch_yolo

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import inspect
import math
import os
import uuid
from collections import *
from collections import deque
from copy import copy, deepcopy
from functools import partial
from itertools import repeat

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch._six import container_abcs
from torch.nn import init
from torch.nn.parameter import Parameter

from trident.backend.common import *
from trident.backend.pytorch_backend import Layer, Sequential, get_device
from trident.backend.pytorch_ops import *
from trident.data.image_common import *
from trident.data.bbox_common import *
from trident.data.utils import download_model_from_google_drive
from trident.layers.pytorch_activations import get_activation, Identity, Mish, LeakyRelu
from trident.layers.pytorch_blocks import *
from trident.layers.pytorch_layers import *
from trident.layers.pytorch_normalizations import get_normalization, BatchNorm2d
from trident.layers.pytorch_pooling import *
from trident.optims.pytorch_trainer import *
from trident.misc.visualization_utils import generate_palette,plot_bbox

__all__ = [ 'yolo4_body', 'YoloDetectionModel', 'DarknetConv2D', 'DarknetConv2D_BN_Mish',
           'DarknetConv2D_BN_Leaky', 'YoloLayer']

_session = get_session()
_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
_epsilon = _session.epsilon
_trident_dir = _session.trident_dir

anchors = np.array([12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]).reshape(
    (1, 1, 1, -1, 2))



[docs]def DarknetConv2D(*args, **kwargs): """Wrapper to set Darknet parameters for Convolution2D.""" darknet_conv_kwargs = {'use_bias': True} darknet_conv_kwargs['auto_pad'] = False if kwargs.get('strides')==(2,2) else True darknet_conv_kwargs['use_bias'] = True darknet_conv_kwargs.update(kwargs) return Conv2d(*args, **darknet_conv_kwargs)
[docs]def DarknetConv2D_BN_Leaky(*args, **kwargs): """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" darknet_conv_kwargs = {'use_bias': False,'normalization':BatchNorm2d(momentum=0.03,eps=1e-4)} darknet_conv_kwargs['activation']=LeakyRelu(alpha=0.1) darknet_conv_kwargs['auto_pad'] = False if kwargs.get('strides') == (2, 2) else True darknet_conv_kwargs.update(kwargs) return Conv2d_Block(*args, **darknet_conv_kwargs)
[docs]def DarknetConv2D_BN_Mish(*args, **kwargs): """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" darknet_conv_kwargs = {'use_bias': False, 'normalization':BatchNorm2d(momentum=0.03,eps=1e-4), 'activation': Mish} darknet_conv_kwargs['auto_pad'] = False if kwargs.get('strides') == (2, 2) else True darknet_conv_kwargs.update(kwargs) return Conv2d_Block(*args, **darknet_conv_kwargs)
def resblock_body(num_filters, num_blocks, all_narrow=True,keep_output=False,name=''): # block=Sequential() # block.add_module(name+'_preconv1',DarknetConv2D_BN_Mish((3, 3),num_filters , strides=(2, 2),auto_pad=False, padding=((1,0),(1,0)),name=name+'_preconv1')) # shortconv=DarknetConv2D_BN_Mish((1, 1), num_filters // 2 if all_narrow else num_filters, name=name + '_shortconv') # branch=Sequential( # DarknetConv2D_BN_Mish((1, 1), num_filters // 2 if all_narrow else num_filters,name=name+'_mainconv'), # For(range(num_blocks), lambda i: # ShortCut2d( # Identity(), # Sequential( # DarknetConv2D_BN_Mish((1, 1),num_filters // 2,name=name+'_for{0}_1'.format(i)), # DarknetConv2D_BN_Mish((3, 3),num_filters // 2 if all_narrow else num_filters,name=name+'_for{0}_2'.format(i)) # ), # mode='add') # ), # DarknetConv2D_BN_Mish( (1, 1),num_filters // 2 if all_narrow else num_filters,name=name+'_postconv') # ) # block.add_module(name+'_route',ShortCut2d(branch,shortconv,mode='concate',name=name+'_route')) # block.add_module(name+'_convblock5',DarknetConv2D_BN_Mish((1,1),num_filters,name=name+'_convblock5')) # return block return Sequential( DarknetConv2D_BN_Mish((3, 3),num_filters , strides=(2, 2),auto_pad=False, padding=((1,0),(1,0)),name=name+'_preconv1'), ShortCut2d( { 1:DarknetConv2D_BN_Mish((1, 1), num_filters // 2 if all_narrow else num_filters, name=name + '_shortconv'), 0:Sequential( DarknetConv2D_BN_Mish((1, 1), num_filters // 2 if all_narrow else num_filters,name=name+'_mainconv'), For(range(num_blocks), lambda i: ShortCut2d( Identity(), Sequential( DarknetConv2D_BN_Mish((1, 1),num_filters // 2,name=name+'_for{0}_1'.format(i)), DarknetConv2D_BN_Mish((3, 3),num_filters // 2 if all_narrow else num_filters,name=name+'_for{0}_2'.format(i)) ), mode='add') ), DarknetConv2D_BN_Mish( (1, 1),num_filters // 2 if all_narrow else num_filters,name=name+'_postconv') )}, mode='concate',name=name+'_route'), DarknetConv2D_BN_Mish((1,1),num_filters,name=name+'_convblock5') ) def darknet_body(): return Sequential( DarknetConv2D_BN_Mish((3, 3), 32), resblock_body(64, 1, all_narrow=False), resblock_body(128, 2), resblock_body(256, 8), resblock_body(512, 8), resblock_body(1024, 4) )
[docs]def yolo4_body(num_classes=80,image_size=608): anchors1 = to_tensor(np.array([12, 16, 19, 36, 40, 28]).reshape(-1, 2),requires_grad=False) anchors2 = to_tensor(np.array([36, 75, 76, 55, 72, 146]).reshape(-1, 2),requires_grad=False) anchors3 = to_tensor(np.array([142, 110, 192, 243, 459, 401]).reshape(-1, 2),requires_grad=False) num_anchors=len(anchors1) """Create YOLO_V4 model CNN body in Keras.""" return Sequential( DarknetConv2D_BN_Mish((3, 3), 32,name='first_layer'), resblock_body(64, 1, all_narrow=False,name='block64'), resblock_body(128, 2,name='block128'), resblock_body(256, 8,name='block256'), ShortCut2d( { 1:Sequential( resblock_body(512, 8,name='block512'), ShortCut2d( { 1:Sequential( resblock_body(1024, 4, name='block1024'), DarknetConv2D_BN_Leaky( (1,1), 512,name='pre_maxpool1'), DarknetConv2D_BN_Leaky( (3, 3),1024,name='pre_maxpool2'), DarknetConv2D_BN_Leaky((1,1),512,name='pre_maxpool3'), ShortCut2d( MaxPool2d((13,13),strides=(1,1),auto_pad=True), MaxPool2d((9,9), strides=(1, 1), auto_pad=True), MaxPool2d((5,5), strides=(1, 1), auto_pad=True), Identity(), mode='concate' ), DarknetConv2D_BN_Leaky((1, 1), 512,name='pre_y19_1'), DarknetConv2D_BN_Leaky((3, 3), 1024,name='pre_y19_2'), DarknetConv2D_BN_Leaky((1, 1), 512,name='y_19',keep_output=True), DarknetConv2D_BN_Leaky((1, 1),256,name='pre_y19_upsample'), Upsampling2d(scale_factor=2,name='y19_upsample'), ), 0:DarknetConv2D_BN_Leaky((1, 1), 256) },mode='concate'), DarknetConv2D_BN_Leaky((1, 1),256,name='pre_y38_1'), DarknetConv2D_BN_Leaky((3, 3),512,name='pre_y38_2'), DarknetConv2D_BN_Leaky((1, 1),256,name='pre_y38_3'), DarknetConv2D_BN_Leaky((3, 3),512,name='pre_y38_4'), DarknetConv2D_BN_Leaky((1, 1),256,name='y_38',keep_output=True), DarknetConv2D_BN_Leaky((1, 1),128,name='pre_y_38_upsample'), Upsampling2d(scale_factor=2,name='y_38_upsample'), ), 0:DarknetConv2D_BN_Leaky((1, 1), 128) }, mode='concate'), DarknetConv2D_BN_Leaky((1, 1), 128,name='pre_y76_concate1'), DarknetConv2D_BN_Leaky((3, 3), 256,name='pre_y76_concate2'), DarknetConv2D_BN_Leaky((1, 1), 128,name='pre_y76_concate3'), DarknetConv2D_BN_Leaky((3, 3), 256,name='pre_y76_concate4'), DarknetConv2D_BN_Leaky((1, 1), 128,name='pre_y76_concate5'), ShortCut2d( #y76_output Sequential( DarknetConv2D_BN_Leaky( (3, 3),256,name='pre_y76_output'), DarknetConv2D( (1, 1),num_anchors * (num_classes + 5),use_bias=True,name='y76_output'), YoloLayer(anchors=anchors1,num_classes=num_classes,grid_size=76, img_dim=image_size), name='y76_output'), # y38_output Sequential( ShortCut2d( DarknetConv2D_BN_Leaky((3, 3), 256, strides=(2, 2), auto_pad=False, padding=((1, 0), (1, 0)),name='y76_downsample'), branch_from='y_38',mode='concate'), DarknetConv2D_BN_Leaky((1, 1), 256,name='pre_y38_concate1'), DarknetConv2D_BN_Leaky((3, 3), 512,name='pre_y38_concate2'), DarknetConv2D_BN_Leaky((1, 1), 256,name='pre_y38_concate3'), DarknetConv2D_BN_Leaky((3, 3), 512,name='pre_y38_concate4'), DarknetConv2D_BN_Leaky((1, 1), 256,name='pre_y38_concate5'), ShortCut2d( Sequential( DarknetConv2D_BN_Leaky((3, 3), 512, name='pre_y38_output'), DarknetConv2D((1, 1), num_anchors * (num_classes + 5), use_bias=True, name='y38_output'), YoloLayer(anchors=anchors2, num_classes=num_classes,grid_size=38, img_dim=image_size), name='y38_output'), Sequential( ShortCut2d( DarknetConv2D_BN_Leaky((3, 3), 512, strides=(2, 2), auto_pad=False, padding=((1, 0), (1, 0)),name='y38_downsample'), branch_from='y_19', mode='concate'), DarknetConv2D_BN_Leaky((1, 1), 512,name='pre_y19_concate1'), DarknetConv2D_BN_Leaky((3, 3), 1024,name='pre_y19_concate2'), DarknetConv2D_BN_Leaky((1, 1), 512,name='pre_y19_concate3'), DarknetConv2D_BN_Leaky((3, 3), 1024,name='pre_y19_concate4'), DarknetConv2D_BN_Leaky((1, 1), 512,name='pre_y19_concate5'), Sequential( DarknetConv2D_BN_Leaky((3, 3),1024,name='pre_y19_output'), DarknetConv2D((1, 1), num_anchors * (num_classes + 5),use_bias=True,name='y19_output'), YoloLayer(anchors=anchors3,num_classes=num_classes,grid_size=19, img_dim=image_size), name='y19_output')), mode='concate') ) ,mode = 'concate') )
[docs]class YoloLayer(Layer): """Detection layer""" def __init__(self, anchors, num_classes,grid_size, img_dim=608): super(YoloLayer, self).__init__() self.register_buffer('grid', None) self.register_buffer('anchors', to_tensor(anchors, requires_grad=False).to(get_device())) self.num_anchors = len(anchors) self.num_classes = num_classes self.ignore_thres = 0.5 self.mse_loss = nn.MSELoss() self.bce_loss = nn.BCELoss() self.obj_scale = 1 self.noobj_scale = 100 self.metrics = {} self.img_dim = img_dim self.grid_size = grid_size # grid size self.compute_grid_offsets(grid_size)
[docs] def compute_grid_offsets(self, grid_size): self.stride = self.img_dim / grid_size self.anchor_vec = self.anchors / self.stride self.anchor_wh = self.anchor_vec.view(1,self.num_anchors, 1, 1, 2) yv, xv = torch.meshgrid([torch.arange(grid_size, device=get_device()), torch.arange(grid_size, device=get_device())]) self.grid = torch.stack((xv, yv), 2).view((1, 1, grid_size, grid_size, 2)).float() self.grid1=meshgrid(grid_size,grid_size,requires_grad=False).view([1, 1, grid_size,grid_size,2])
# Calculate offsets for each grid
[docs] def forward(self, x, targets=None): num_samples = x.size(0) grid_size = x.size(2) # # If grid size does not match current we compute new offsets # if grid_size != self.grid_size: # self.compute_grid_offsets(grid_size, cuda=x.is_cuda) prediction = x.clone().view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous() # Get outputs xy = sigmoid(prediction[..., 0:2]) # Center x wh = prediction[..., 2:4] # Width pred_conf = sigmoid(prediction[..., 4]) # Conf pred_cls = sigmoid(prediction[..., 5:]) # Cls pred. cls_probs=reduce_max(pred_cls,-1,keepdims=True) # Add offset and scale with anchors pred_boxes = zeros_like(prediction[..., :4]) pred_boxes[..., 0:2] = xy + self.grid.to(get_device()) pred_boxes[..., 2:4] = exp(wh) * self.anchor_wh.to(get_device()) output = torch.cat((pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes),), -1, ) return output
# if targets is None: # return output, # else: # iou_scores, class_mask, obj_mask, # noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( # pred_boxes=pred_boxes, # pred_cls=pred_cls, # target=targets, # anchors=self.scaled_anchors, # ignore_thres=self.ignore_thres, # ) # # # Loss : Mask outputs to ignore non-existing # objects (except with conf. loss) # loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) # # loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) # loss_w = self.mse_loss(w[obj_mask], # tw[obj_mask]) # loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) # loss_conf_obj = # self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) # loss_conf_noobj = self.bce_loss( # pred_conf[noobj_mask], tconf[noobj_mask]) # loss_conf = self.obj_scale * loss_conf_obj + # self.noobj_scale * loss_conf_noobj # loss_cls = self.bce_loss(pred_cls[obj_mask], # tcls[obj_mask]) # total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # # # Metrics # cls_acc = 100 * class_mask[obj_mask].mean() # conf_obj = pred_conf[obj_mask].mean() # # conf_noobj = pred_conf[noobj_mask].mean() # conf50 = (pred_conf > 0.5).float() # iou50 = ( # iou_scores > 0.5).float() # iou75 = (iou_scores > 0.75).float() # detected_mask = conf50 * # class_mask * tconf # precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) # # recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) # recall75 = torch.sum(iou75 * # detected_mask) / (obj_mask.sum() + 1e-16) # # self.metrics = { # "loss": to_numpy( # total_loss).item(), # "x":to_numpy(loss_x).item(), # "y": to_numpy(loss_y).item(), # "w": to_numpy(loss_w).item(), # "h": to_numpy(loss_h).item(), # "conf": to_cpu( # loss_conf).item(), # "cls": to_numpy(loss_cls).item(), # "cls_acc": to_cpu( # cls_acc).item(), # "recall50": to_cpu(recall50).item(), # "recall75": to_cpu( # recall75).item(), # "precision": to_cpu(precision).item(), # "conf_obj": to_cpu( # conf_obj).item(), # "conf_noobj": to_cpu(conf_noobj).item(), # "grid_size": grid_size, # } # # return output, total_loss
[docs]class YoloDetectionModel(ImageDetectionModel): def __init__(self, inputs=None, output=None, input_shape=None): super(YoloDetectionModel, self).__init__(inputs, output, input_shape) self.preprocess_flow = [resize((input_shape[-2], input_shape[-1]), True), normalize(0, 255)] self.detection_threshold = 0.7 self.iou_threshold = 0.3 self.class_names = None self.palette = generate_palette(80)
[docs] def area_of(self, left_top, right_bottom): """Compute the areas of rectangles given two corners. Args: left_top (N, 2): left top corner. right_bottom (N, 2): right bottom corner. Returns: area (N): return the area. """ hw = np.clip(right_bottom - left_top, 0.0, None) return hw[..., 0] * hw[..., 1]
[docs] def iou_of(self, boxes0, boxes1, eps=1e-5): """Return intersection-over-union (Jaccard index) of boxes. Args: boxes0 (N, 4): ground truth boxes. boxes1 (N or 1, 4): predicted boxes. eps: a small number to avoid 0 as denominator. Returns: iou (N): IoU values. """ overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) overlap_area = self.area_of(overlap_left_top, overlap_right_bottom) area0 = self.area_of(boxes0[..., :2], boxes0[..., 2:]) area1 = self.area_of(boxes1[..., :2], boxes1[..., 2:]) return overlap_area / (area0 + area1 - overlap_area + eps)
[docs] def hard_nms(self, box_scores, iou_threshold, top_k=-1, candidate_size=200): """ Args: box_scores (N, 5): boxes in corner-form and probabilities. iou_threshold: intersection over union threshold. top_k: keep top_k results. If k <= 0, keep all the results. candidate_size: only consider the candidates with the highest scores. Returns: picked: a list of indexes of the kept boxes """ if box_scores is None or len(box_scores) == 0: return None, None scores = box_scores[:, -1] boxes = box_scores[:, :4] picked = [] # _, indexes = scores.sort(descending=True) indexes = np.argsort(scores) # indexes = indexes[:candidate_size] indexes = indexes[-candidate_size:] while len(indexes) > 0: # current = indexes[0] current = indexes[-1] picked.append(current) if 0 < top_k == len(picked) or len(indexes) == 1: break current_box = boxes[current, :] # indexes = indexes[1:] indexes = indexes[:-1] rest_boxes = boxes[indexes, :] iou = self.iou_of(rest_boxes, np.expand_dims(current_box, axis=0), ) indexes = indexes[iou <= iou_threshold] return box_scores[picked, :], picked
[docs] def nms(self, boxes, threshold=0.3): # if there are no boxes, return an empty list if len(boxes) == 0: return [] # initialize the list of picked indexes pick = [] # grab the coordinates of the bounding boxes x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] # compute the area of the bounding boxes and sort the bounding # boxes by the bottom-right y-coordinate of the bounding box area = (x2 - x1 + 1) * (y2 - y1 + 1) idxs = np.argsort(y2) # keep looping while some indexes still remain in the indexes # list while len(idxs) > 0: # grab the last index in the indexes list, add the index # value to the list of picked indexes, then initialize # the suppression list (i.e. indexes that will be deleted) # using the last index last = len(idxs) - 1 i = idxs[last] pick.append(i) suppress = [last] # loop over all indexes in the indexes list for pos in range(0, last): # grab the current index j = idxs[pos] # find the largest (x, y) coordinates for the start of # the bounding box and the smallest (x, y) coordinates # for the end of the bounding box xx1 = max(x1[i], x1[j]) yy1 = max(y1[i], y1[j]) xx2 = min(x2[i], x2[j]) yy2 = min(y2[i], y2[j]) # compute the width and height of the bounding box w = max(0, xx2 - xx1 + 1) h = max(0, yy2 - yy1 + 1) # compute the ratio of overlap between the computed # bounding box and the bounding box in the area list overlap = float(w * h) / area[j] # if there is sufficient overlap, suppress the # current bounding box if overlap > threshold: suppress.append(pos) # delete all indexes from the index list that are in the # suppression list idxs = np.delete(idxs, suppress) # return only the bounding boxes that were picked return boxes[pick], pick
[docs] def infer_single_image(self, img, scale=1, verbose=False): if self._model.built: try: self._model.to(self.device) self._model.eval() img = image2array(img) if img.shape[-1] == 4: img = img[:, :, :3] img_orig = img.copy() for func in self.preprocess_flow: if inspect.isfunction(func): img = func(img) if func.__qualname__ == 'resize.<locals>.img_op': scale = func.scale img = image_backend_adaption(img) inp = to_tensor(np.expand_dims(img, 0)).to(self.device).to(self._model.weights[0].data.dtype) boxes = self._model(inp)[0] if verbose: print(min(boxes[:, 4]),max(boxes[:, 4])) mask = boxes[:, 4] > self.detection_threshold boxes = boxes[mask] if verbose: print('detection threshold:{0}'.format(self.detection_threshold)) print('{0} bboxes keep!'.format(len(boxes))) if boxes is not None and len(boxes) > 0: boxes = to_numpy(boxes) boxes = np.concatenate([xywh2xyxy(boxes[:, :4]), boxes[:, 4:]], axis=-1) if len(boxes) > 1: box_probs, keep = self.hard_nms(boxes[:, :5], iou_threshold=self.iou_threshold, top_k=-1, ) boxes = boxes[keep] print('iou threshold:{0}'.format(self.iou_threshold)) print('{0} bboxes keep!'.format(len(boxes))) boxes[:, :4] /=scale # boxes = boxes * (1 / scale[0]) return img_orig, boxes[:, :4], np.argmax(boxes[:, 5:], -1).astype(np.int32), boxes[:, 4] else: return img_orig, None, None, None except: PrintException() else: raise ValueError('the model is not built yet.')
[docs] def infer_then_draw_single_image(self, img, scale=1, verbose=False): rgb_image, boxes, labels, probs = self.infer_single_image(img, scale, verbose) if verbose and boxes is not None: for i in range(len(boxes)): print('box{0}: {1} prob:{2:.2%} class:{3}'.format(i, [round(num,4) for num in boxes[i].tolist()], probs[i], labels[i] if self.class_names is None or i >= len( self.class_names) else self.class_names[ int(labels[i])])) if boxes is not None and len(boxes) > 0: boxes = np.round(boxes).astype(np.int32) if boxes.ndim == 1: boxes = np.expand_dims(boxes, 0) if labels.ndim == 0: labels = np.expand_dims(labels, 0) for m in range(len(boxes)): this_box = boxes[m] this_label = labels[m] thiscolor=tuple([int(c) for c in self.palette[this_label][:3]]) rgb_image=plot_bbox(this_box,rgb_image,thiscolor,self.class_names[int(labels[m])]) # cv2.rectangle(bgr_image, (this_box[0], this_box[1]), (this_box[2], this_box[3]), # self.palette[this_label], # 1 if bgr_image.shape[1] < 480 else 2 if bgr_image.shape[1] < 640 else 3 if # bgr_image.shape[1] < 960 else 4) #rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_RGB2BGR) return rgb_image