DSSD理论+源码学习(2)

DSSD理论+源码学习(2)

Data_encoder主要包括了proposal box的生成,nms的计算,对每个框进行encode和decode的部分。


Data_encoder.py源码解析

import math
import torch
import itertools
import numpy as np
import torch.nn.functional as func
from torch.autograd import Variable
import ipdb

class DataEncoder:
    #超参数的设置,主要为anchor的尺度,大小等
    def __init__(self, scale = 300):
        if scale == 300:
            self.scale = 300.0#图片大小300*300
            self.variance = [0.1, 0.2]#用于计算GT和default box之间的偏移
            steps = [8, 16, 32, 64, 100, 300] #FM和原图之间的感受野比例
            sizes = [30, 40, 50, 60, 70, 80, 100]#每个anchor的基础边长
            #sizes = [30, 60, 111, 162, 213, 264, 315]
            self.aspect_ratios = ((2, ), (2, 3), (2, 3), (2, 3), (2, ), (2, ))#变换尺度
            self.feature_map_sizes = (38, 19, 10, 5, 3, 1)
        elif scale == 224:
            self.scale = 224.0
            self.variance = [0.1, 0.2]
            steps = [6, 12, 24, 48, 75, 224]
            sizes = [22, 44, 83, 121, 159, 197, 235]
            self.aspect_ratios = ((2, ), (2, 3), (2, 3), (2, 3), (2, ), (2, ))
            self.feature_map_sizes = (28, 14, 7, 4, 2, 1)
        elif scale == 512:
            self.scale = 512.0
            self.variance = [0.1, 0.2]
            steps = [8, 16, 32, 64, 85, 128]
            sizes = [22, 44 , 66, 88, 108, 128, 144]
            self.aspect_ratios = ((2, ), (2, 3), (2, 3), (2, 3), (2, ), (2, ))
            self.feature_map_sizes = (64, 32, 16, 8, 6, 4)
        else:
            raise ValueError('Scale not supported')

        self.classes = 1+1#前景+背景
        num_layers = len(self.feature_map_sizes)

        boxes = []
        for i in range(num_layers):#一共6个feature map
            fmsize = self.feature_map_sizes[i]
            for h, w in itertools.product(range(fmsize), repeat=2): #对每个FM中的小格子,
                #对每个FM中的小格子,计算对应数量的anchor
                cx = (w + 0.5) * steps[i]
                cy = (h + 0.5) * steps[i]

                boxes.append((cx, cy, sizes[i], sizes[i]))

                s = math.sqrt(sizes[i] * sizes[i + 1])
                boxes.append((cx, cy, s, s))
                #变换尺度计算
                for ar in self.aspect_ratios[i]:
                    boxes.append((cx, cy, sizes[i] * math.sqrt(ar), sizes[i] / math.sqrt(ar)))
                    boxes.append((cx, cy, sizes[i] / math.sqrt(ar), sizes[i] * math.sqrt(ar)))

        self.default_boxes = torch.Tensor(boxes)
        self.default_boxes_gpu = self.default_boxes

    def iou(self, box1, box2):
        '''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].
        Args:
          box1: (tensor) bounding boxes, sized [N,4].
          box2: (tensor) bounding boxes, sized [M,4].
        Return:
          (tensor) iou, sized [N,M].
        '''
        N = box1.size(0)
        M = box2.size(0)

        lt = torch.max(
            box1[:, :2].unsqueeze(1).expand(N, M, 2),  # [N,2] -> [N,1,2] -> [N,M,2]
            box2[:, :2].unsqueeze(0).expand(N, M, 2),  # [M,2] -> [1,M,2] -> [N,M,2]
        )

        rb = torch.min(
            box1[:, 2:].unsqueeze(1).expand(N, M, 2),  # [N,2] -> [N,1,2] -> [N,M,2]
            box2[:, 2:].unsqueeze(0).expand(N, M, 2),  # [M,2] -> [1,M,2] -> [N,M,2]
        )

        wh = rb - lt  # [N,M,2]
        wh[wh < 0] = 0  # clip at 0
        inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]

        area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])  # [N,]
        area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])  # [M,]
        area1 = area1.unsqueeze(1).expand_as(inter)  # [N,] -> [N,1] -> [N,M]
        area2 = area2.unsqueeze(0).expand_as(inter)  # [M,] -> [1,M] -> [N,M]

        iou = inter / (area1 + area2 - inter)#并集比上交集
        return iou


    def encode(self, bboxes, threshold_multi=0.5):
        boxes = bboxes[:,:4]#GT的坐标
        classes = bboxes[:, 4:]#GT的类别
        default_boxes = self.default_boxes #通过最上方求得的8732个proposal box(cx,cy,w,h)
        iou_td = self.iou(boxes,
                          torch.cat([default_boxes[:, :2] - default_boxes[:, 2:] / 2,
                                     default_boxes[:, :2] + default_boxes[:, 2:] / 2], 1))#把default的坐标变换为(x1,y1,x2,y2)

        iou_td, max_idx = iou_td.max(0)#返回按行检索的最大值,以及最大值的坐标,iou最大的anchor框
        max_idx.squeeze_(0)
        iou_td.squeeze_(0)

        boxes_tar = boxes[max_idx]#按照max_idx的维度扩张
        #proposal box 和 GT 之间的偏移量
        center = (boxes_tar[:, :2] + boxes_tar[:, 2:]) / 2 - default_boxes[:, :2]
        center /= self.variance[0] * default_boxes[:, 2:]
        wide_height = (boxes_tar[:, 2:] - boxes_tar[:, :2]) / default_boxes[:, 2:]
        wide_height = torch.log(wide_height) / self.variance[1]
        loc = torch.cat([center, wide_height], 1)

        conf = classes[max_idx]
        conf[(iou_td < threshold_multi).unsqueeze(1).expand_as(conf)] = 0#其中iou小于0.5的置信值为0
        return loc, conf





    def nms_(self, boxes, scores, overlap=0.5, top_k=5):
        """Apply non-maximum suppression at test time to avoid detecting too many
        overlapping bounding boxes for a given object.
        Args:
            boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
            scores: (tensor) The class predscores for the img, Shape:[num_priors].
            overlap: (float) The overlap thresh for suppressing unnecessary boxes.
            top_k: (int) The Maximum number of box preds to consider.
        Return:
            The indices of the kept boxes with respect to num_priors.
        """

        keep = scores.new(scores.size(0)).zero_().long()
        if boxes.numel() == 0:
            return keep
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]
        area = torch.mul(x2 - x1, y2 - y1)
        v, idx = scores.sort(0)  # sort in ascending order
        # I = I[v >= 0.01]
        idx = idx[-top_k:]  # indices of the top-k largest vals
        xx1 = boxes.new()
        yy1 = boxes.new()
        xx2 = boxes.new()
        yy2 = boxes.new()
        w = boxes.new()
        h = boxes.new()

        # keep = torch.Tensor()
        count = 0
        while idx.numel() > 0:
            i = idx[-1]  # index of current largest val
            # keep.append(i)
            keep[count] = i
            count += 1
            if idx.size(0) == 1:
                break
            idx = idx[:-1]  # remove kept element from view
            # load bboxes of next highest vals
            torch.index_select(x1, 0, idx, out=xx1)
            torch.index_select(y1, 0, idx, out=yy1)
            torch.index_select(x2, 0, idx, out=xx2)
            torch.index_select(y2, 0, idx, out=yy2)
            # store element-wise max with next highest score
            xx1 = torch.clamp(xx1, min=x1[i])
            yy1 = torch.clamp(yy1, min=y1[i])
            xx2 = torch.clamp(xx2, max=x2[i])
            yy2 = torch.clamp(yy2, max=y2[i])
            w.resize_as_(xx2)
            h.resize_as_(yy2)
            w = xx2 - xx1
            h = yy2 - yy1
            # check sizes of xx1 and xx2.. after each iteration
            w = torch.clamp(w, min=0.0)
            h = torch.clamp(h, min=0.0)
            inter = w * h
            # IoU = i / (area(a) + area(b) - i)
            rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
            union = (rem_areas - inter) + area[i]
            IoU = inter / union  # store result in iou
            # keep only elements with an IoU <= overlap
            idx = idx[IoU.le(overlap)]

        return keep[:count]

    def nms(self, bboxes, scores, threshold=0.5, mode='union', top_k=5):
        '''Non maximum suppression.
        Args:
          bboxes: (tensor) bounding boxes, sized [N,4].
          scores: (tensor) bbox scores, sized [N,].
          threshold: (float) overlap threshold.
          mode: (str) 'union' or 'min'.
        Returns:
          keep: (tensor) selected indices.
        Ref:
          https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/nms/py_cpu_nms.py
        '''
        #nms是在test的时候才会使用,将所有的框暴力抑制,得到最后的结果
        x1 = bboxes[:, 0]
        y1 = bboxes[:, 1]
        x2 = bboxes[:, 2]
        y2 = bboxes[:, 3]
        areas = (x2 - x1) * (y2 - y1)
        _, order = scores.sort(0, descending=True) #根据每个bbox的置信值得分,从大到小进行排列。
        c = 0
        keep = []
        while order.numel() > 0 and keep.__len__() < top_k:#循环直到满足需要的5个最大框。top_k取值为5
            i = order[0]#最大scores的下标
            keep.append(i)
            c += 1
            if order.numel() == 1:
                break

            xx1 = x1[order[1:]].clamp(min=x1[i].item()) #clamp操作:a.clamp(min=b),即判断a是否小于b,如果是,返回a。反之,b
            yy1 = y1[order[1:]].clamp(min=y1[i].item())
            xx2 = x2[order[1:]].clamp(max=x2[i].item())
            yy2 = y2[order[1:]].clamp(max=y2[i].item())



            w = (xx2 - xx1).clamp(min=0)
            h = (yy2 - yy1).clamp(min=0)
            inter = w * h

            if mode == 'union':
                ovr = inter / (areas[i] + areas[order[1:]] - inter)#交集比上并集
            elif mode == 'min':
                ovr = inter / areas[order[1:]].clamp(max=areas[i].item())
            else:
                raise TypeError('Unknown nms mode: %s.' % mode)

            ids = (ovr <= threshold).nonzero().squeeze()#iou小于0.5的位置返回,保存到ids中
            #这里仅仅是为了保证整体robust
            if ids.numpy().ndim>1:
                if ids.shape[1] == 2:
                    ids = np.delete(ids, 1, axis=1)
            if ids.numel() == 0:
                break
            order = order[ids + 1]#剩余的iou小于0.5的,继续求最大值。
            order = np.reshape(order, [order.shape[0], 1])

        return torch.LongTensor(keep) #返回5个最大框的下标
    #在test阶段,将偏移转换为正常框的坐标并返回。
    def decode(self, loc, conf):
        classes = conf.size(1)
        #将偏移值转化为原值
        wh = torch.exp(loc[:, 2:] * self.variance[1]) * self.default_boxes_gpu[:, 2:]
        cxcy = loc[:, :2] * self.variance[0] * self.default_boxes_gpu[:, 2:] + self.default_boxes_gpu[:, :2]
        boxes = torch.cat([cxcy - wh / 2, cxcy + wh / 2], 1)  # [8732,4]

        conf_pos = conf[:, 0]
        # from ipdb import set_trace
        # set_trace()
        ids = conf_pos>0.5 #因为这里过了个sigmoid,所以大于0.5为正例         # [#boxes,]
        if ids.__len__() == 0:
            return np.array([]), np.array([]), np.array([])
        keep = self.nms(boxes[ids], conf_pos.unsqueeze(1)[ids])
        bbox = torch.cat((conf_pos[ids][keep].unsqueeze(1), boxes[ids][keep]), 1)
        return bbox.cpu().numpy() #返回5个bbox的值,包括bbox的置信值

未完待续。。。(os:这两天太累了,续不动)


你可能感兴趣的:(目标检测)