Data_encoder主要包括了proposal box的生成,nms的计算,对每个框进行encode和decode的部分。
Data_encoder.py源码解析
import math
import torch
import itertools
import numpy as np
import torch.nn.functional as func
from torch.autograd import Variable
import ipdb
class DataEncoder:
#超参数的设置,主要为anchor的尺度,大小等
def __init__(self, scale = 300):
if scale == 300:
self.scale = 300.0#图片大小300*300
self.variance = [0.1, 0.2]#用于计算GT和default box之间的偏移
steps = [8, 16, 32, 64, 100, 300] #FM和原图之间的感受野比例
sizes = [30, 40, 50, 60, 70, 80, 100]#每个anchor的基础边长
#sizes = [30, 60, 111, 162, 213, 264, 315]
self.aspect_ratios = ((2, ), (2, 3), (2, 3), (2, 3), (2, ), (2, ))#变换尺度
self.feature_map_sizes = (38, 19, 10, 5, 3, 1)
elif scale == 224:
self.scale = 224.0
self.variance = [0.1, 0.2]
steps = [6, 12, 24, 48, 75, 224]
sizes = [22, 44, 83, 121, 159, 197, 235]
self.aspect_ratios = ((2, ), (2, 3), (2, 3), (2, 3), (2, ), (2, ))
self.feature_map_sizes = (28, 14, 7, 4, 2, 1)
elif scale == 512:
self.scale = 512.0
self.variance = [0.1, 0.2]
steps = [8, 16, 32, 64, 85, 128]
sizes = [22, 44 , 66, 88, 108, 128, 144]
self.aspect_ratios = ((2, ), (2, 3), (2, 3), (2, 3), (2, ), (2, ))
self.feature_map_sizes = (64, 32, 16, 8, 6, 4)
else:
raise ValueError('Scale not supported')
self.classes = 1+1#前景+背景
num_layers = len(self.feature_map_sizes)
boxes = []
for i in range(num_layers):#一共6个feature map
fmsize = self.feature_map_sizes[i]
for h, w in itertools.product(range(fmsize), repeat=2): #对每个FM中的小格子,
#对每个FM中的小格子,计算对应数量的anchor
cx = (w + 0.5) * steps[i]
cy = (h + 0.5) * steps[i]
boxes.append((cx, cy, sizes[i], sizes[i]))
s = math.sqrt(sizes[i] * sizes[i + 1])
boxes.append((cx, cy, s, s))
#变换尺度计算
for ar in self.aspect_ratios[i]:
boxes.append((cx, cy, sizes[i] * math.sqrt(ar), sizes[i] / math.sqrt(ar)))
boxes.append((cx, cy, sizes[i] / math.sqrt(ar), sizes[i] * math.sqrt(ar)))
self.default_boxes = torch.Tensor(boxes)
self.default_boxes_gpu = self.default_boxes
def iou(self, box1, box2):
'''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].
Args:
box1: (tensor) bounding boxes, sized [N,4].
box2: (tensor) bounding boxes, sized [M,4].
Return:
(tensor) iou, sized [N,M].
'''
N = box1.size(0)
M = box2.size(0)
lt = torch.max(
box1[:, :2].unsqueeze(1).expand(N, M, 2), # [N,2] -> [N,1,2] -> [N,M,2]
box2[:, :2].unsqueeze(0).expand(N, M, 2), # [M,2] -> [1,M,2] -> [N,M,2]
)
rb = torch.min(
box1[:, 2:].unsqueeze(1).expand(N, M, 2), # [N,2] -> [N,1,2] -> [N,M,2]
box2[:, 2:].unsqueeze(0).expand(N, M, 2), # [M,2] -> [1,M,2] -> [N,M,2]
)
wh = rb - lt # [N,M,2]
wh[wh < 0] = 0 # clip at 0
inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]) # [N,]
area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1]) # [M,]
area1 = area1.unsqueeze(1).expand_as(inter) # [N,] -> [N,1] -> [N,M]
area2 = area2.unsqueeze(0).expand_as(inter) # [M,] -> [1,M] -> [N,M]
iou = inter / (area1 + area2 - inter)#并集比上交集
return iou
def encode(self, bboxes, threshold_multi=0.5):
boxes = bboxes[:,:4]#GT的坐标
classes = bboxes[:, 4:]#GT的类别
default_boxes = self.default_boxes #通过最上方求得的8732个proposal box(cx,cy,w,h)
iou_td = self.iou(boxes,
torch.cat([default_boxes[:, :2] - default_boxes[:, 2:] / 2,
default_boxes[:, :2] + default_boxes[:, 2:] / 2], 1))#把default的坐标变换为(x1,y1,x2,y2)
iou_td, max_idx = iou_td.max(0)#返回按行检索的最大值,以及最大值的坐标,iou最大的anchor框
max_idx.squeeze_(0)
iou_td.squeeze_(0)
boxes_tar = boxes[max_idx]#按照max_idx的维度扩张
#proposal box 和 GT 之间的偏移量
center = (boxes_tar[:, :2] + boxes_tar[:, 2:]) / 2 - default_boxes[:, :2]
center /= self.variance[0] * default_boxes[:, 2:]
wide_height = (boxes_tar[:, 2:] - boxes_tar[:, :2]) / default_boxes[:, 2:]
wide_height = torch.log(wide_height) / self.variance[1]
loc = torch.cat([center, wide_height], 1)
conf = classes[max_idx]
conf[(iou_td < threshold_multi).unsqueeze(1).expand_as(conf)] = 0#其中iou小于0.5的置信值为0
return loc, conf
def nms_(self, boxes, scores, overlap=0.5, top_k=5):
"""Apply non-maximum suppression at test time to avoid detecting too many
overlapping bounding boxes for a given object.
Args:
boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
scores: (tensor) The class predscores for the img, Shape:[num_priors].
overlap: (float) The overlap thresh for suppressing unnecessary boxes.
top_k: (int) The Maximum number of box preds to consider.
Return:
The indices of the kept boxes with respect to num_priors.
"""
keep = scores.new(scores.size(0)).zero_().long()
if boxes.numel() == 0:
return keep
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
area = torch.mul(x2 - x1, y2 - y1)
v, idx = scores.sort(0) # sort in ascending order
# I = I[v >= 0.01]
idx = idx[-top_k:] # indices of the top-k largest vals
xx1 = boxes.new()
yy1 = boxes.new()
xx2 = boxes.new()
yy2 = boxes.new()
w = boxes.new()
h = boxes.new()
# keep = torch.Tensor()
count = 0
while idx.numel() > 0:
i = idx[-1] # index of current largest val
# keep.append(i)
keep[count] = i
count += 1
if idx.size(0) == 1:
break
idx = idx[:-1] # remove kept element from view
# load bboxes of next highest vals
torch.index_select(x1, 0, idx, out=xx1)
torch.index_select(y1, 0, idx, out=yy1)
torch.index_select(x2, 0, idx, out=xx2)
torch.index_select(y2, 0, idx, out=yy2)
# store element-wise max with next highest score
xx1 = torch.clamp(xx1, min=x1[i])
yy1 = torch.clamp(yy1, min=y1[i])
xx2 = torch.clamp(xx2, max=x2[i])
yy2 = torch.clamp(yy2, max=y2[i])
w.resize_as_(xx2)
h.resize_as_(yy2)
w = xx2 - xx1
h = yy2 - yy1
# check sizes of xx1 and xx2.. after each iteration
w = torch.clamp(w, min=0.0)
h = torch.clamp(h, min=0.0)
inter = w * h
# IoU = i / (area(a) + area(b) - i)
rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
union = (rem_areas - inter) + area[i]
IoU = inter / union # store result in iou
# keep only elements with an IoU <= overlap
idx = idx[IoU.le(overlap)]
return keep[:count]
def nms(self, bboxes, scores, threshold=0.5, mode='union', top_k=5):
'''Non maximum suppression.
Args:
bboxes: (tensor) bounding boxes, sized [N,4].
scores: (tensor) bbox scores, sized [N,].
threshold: (float) overlap threshold.
mode: (str) 'union' or 'min'.
Returns:
keep: (tensor) selected indices.
Ref:
https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/nms/py_cpu_nms.py
'''
#nms是在test的时候才会使用,将所有的框暴力抑制,得到最后的结果
x1 = bboxes[:, 0]
y1 = bboxes[:, 1]
x2 = bboxes[:, 2]
y2 = bboxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
_, order = scores.sort(0, descending=True) #根据每个bbox的置信值得分,从大到小进行排列。
c = 0
keep = []
while order.numel() > 0 and keep.__len__() < top_k:#循环直到满足需要的5个最大框。top_k取值为5
i = order[0]#最大scores的下标
keep.append(i)
c += 1
if order.numel() == 1:
break
xx1 = x1[order[1:]].clamp(min=x1[i].item()) #clamp操作:a.clamp(min=b),即判断a是否小于b,如果是,返回a。反之,b
yy1 = y1[order[1:]].clamp(min=y1[i].item())
xx2 = x2[order[1:]].clamp(max=x2[i].item())
yy2 = y2[order[1:]].clamp(max=y2[i].item())
w = (xx2 - xx1).clamp(min=0)
h = (yy2 - yy1).clamp(min=0)
inter = w * h
if mode == 'union':
ovr = inter / (areas[i] + areas[order[1:]] - inter)#交集比上并集
elif mode == 'min':
ovr = inter / areas[order[1:]].clamp(max=areas[i].item())
else:
raise TypeError('Unknown nms mode: %s.' % mode)
ids = (ovr <= threshold).nonzero().squeeze()#iou小于0.5的位置返回,保存到ids中
#这里仅仅是为了保证整体robust
if ids.numpy().ndim>1:
if ids.shape[1] == 2:
ids = np.delete(ids, 1, axis=1)
if ids.numel() == 0:
break
order = order[ids + 1]#剩余的iou小于0.5的,继续求最大值。
order = np.reshape(order, [order.shape[0], 1])
return torch.LongTensor(keep) #返回5个最大框的下标
#在test阶段,将偏移转换为正常框的坐标并返回。
def decode(self, loc, conf):
classes = conf.size(1)
#将偏移值转化为原值
wh = torch.exp(loc[:, 2:] * self.variance[1]) * self.default_boxes_gpu[:, 2:]
cxcy = loc[:, :2] * self.variance[0] * self.default_boxes_gpu[:, 2:] + self.default_boxes_gpu[:, :2]
boxes = torch.cat([cxcy - wh / 2, cxcy + wh / 2], 1) # [8732,4]
conf_pos = conf[:, 0]
# from ipdb import set_trace
# set_trace()
ids = conf_pos>0.5 #因为这里过了个sigmoid,所以大于0.5为正例 # [#boxes,]
if ids.__len__() == 0:
return np.array([]), np.array([]), np.array([])
keep = self.nms(boxes[ids], conf_pos.unsqueeze(1)[ids])
bbox = torch.cat((conf_pos[ids][keep].unsqueeze(1), boxes[ids][keep]), 1)
return bbox.cpu().numpy() #返回5个bbox的值,包括bbox的置信值
未完待续。。。(os:这两天太累了,续不动)