续(一)
作为小白博主,这里有必要先了解下机器学习中老生常谈的正则化的含义。
所谓正则化就是结构风险最小化策略的实现,在经验风险上加一个正则项或罚项,正则项包括L1正则化(L1范数)----线性回归模型和L2正则化(L2范数)-----岭回归
vgg网络的co
nv4_3特征图大小38X38,网络层靠前,norm较大,加一个L2正则化,可以防止与后面的检测层差异太大。
class L2Norm(nn.Module):
def __init__(self,n_channels, scale):
super(L2Norm,self).__init__()
self.n_channels = n_channels
self.gamma = scale or None
self.eps = 1e-10
#将一个不可训练的类型Tensor转换成可以训练的类型 parameter
self.weight = nn.Parameter(torch.Tensor(self.n_channels))
self.reset_parameters()
#初始化参数
def reset_parameters(self):
init.constant_(self.weight,self.gamma)
def forward(self, x):
#计算 x 的2范数
norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
#x /= norm
x = torch.div(x,norm) #乘以缩放系数
#扩展self.weight的维度为shape[1,512,1,1]
out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
return out
#utils/augmentation.py
def jaccard_numpy(box_a, box_b):
#jaccard相似度系数定义为两个框的交集与并集之比,若集合A,B都为空时,J(A,B)定义为1
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2]-box_a[:, 0]) *
(box_a[:, 3]-box_a[:, 1])) # [A,B]
area_b = ((box_b[2]-box_b[0]) *
(box_b[3]-box_b[1])) # [A,B]
union = area_a + area_b - inter
return inter / union # [A,B]
class Compose(object):
"""将几个扩展组合在一起
Args:
transforms (List[Transform]): 要组合的转换列表
Example:
>>> augmentations.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.ToTensor(),
>>> ])
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img, boxes=None, labels=None):
for t in self.transforms:
img, boxes, labels = t(img, boxes, labels)
return img, boxes, labels
class ConvertFromInts(object):
def __call__(self, image, boxes=None, labels=None):
return image.astype(np.float32), boxes, labels
class SubtractMeans(object):
def __init__(self, mean):
self.mean = np.array(mean, dtype=np.float32)
def __call__(self, image, boxes=None, labels=None):
image = image.astype(np.float32)
image -= self.mean
return image.astype(np.float32), boxes, labels
class ToAbsoluteCoords(object):
def __call__(self, image, boxes=None, labels=None):
height, width, channels = image.shape
boxes[:, 0] *= width
boxes[:, 2] *= width
boxes[:, 1] *= height
boxes[:, 3] *= height
return image, boxes, labels
class ToPercentCoords(object):
def __call__(self, image, boxes=None, labels=None):
height, width, channels = image.shape
boxes[:, 0] /= width
boxes[:, 2] /= width
boxes[:, 1] /= height
boxes[:, 3] /= height
return image, boxes, labels
class Resize(object):
def __init__(self, size=300):
self.size = size
def __call__(self, image, boxes=None, labels=None):
image = cv2.resize(image, (self.size,
self.size))
return image, boxes, labels
class RandomSaturation(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 1] *= random.uniform(self.lower, self.upper)
return image, boxes, labels
class RandomHue(object):
def __init__(self, delta=18.0):
assert delta >= 0.0 and delta <= 360.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 0] += random.uniform(-self.delta, self.delta)
image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
return image, boxes, labels
class RandomLightingNoise(object):
def __init__(self):
self.perms = ((0, 1, 2), (0, 2, 1),
(1, 0, 2), (1, 2, 0),
(2, 0, 1), (2, 1, 0))
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
swap = self.perms[random.randint(len(self.perms))]
shuffle = SwapChannels(swap) # shuffle channels
image = shuffle(image)
return image, boxes, labels
#变换颜色空间
class ConvertColor(object):
def __init__(self, current='BGR', transform='HSV'): #若当前为BGR则变换到HSV,若当前为HSV变换到BGR
self.transform = transform #变换到HSV
self.current = current #当前默认为BGR
def __call__(self, image, boxes=None, labels=None):
if self.current == 'BGR' and self.transform == 'HSV':
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
elif self.current == 'HSV' and self.transform == 'BGR':
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
else:
raise NotImplementedError
return image, boxes, labels
#随机改变对比度
class RandomContrast(object):
def __init__(self, lower=0.5, upper=1.5): #在原图像素上乘一个系数(系数范围为【0.5,1.5】)
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
# expects float image
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
alpha = random.uniform(self.lower, self.upper)
image *= alpha
return image, boxes, labels
#随机改变亮度
#在原有图片像素上加一个实数(实数的范围在【-32,32】),其中:random.randint(2):在0和1之间随机产生一个数,random.uniform(x, y) :将随机生成一个实数,它在 [x,y] 范围
class RandomBrightness(object):
def __init__(self, delta=32): #默认delta=32,delta的范围要在0-255之
assert delta >= 0.0
assert delta <= 255.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
delta = random.uniform(-self.delta, self.delta)
image += delta
return image, boxes, labels
class ToCV2Image(object):
def __call__(self, tensor, boxes=None, labels=None):
return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
class ToTensor(object):
def __call__(self, cvimage, boxes=None, labels=None):
return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
#随机裁剪,在图像上随机剪裁矩形区域,裁剪区域一定要包含bbox的中心点,将原始图bbox转换到剪裁区域的bbox
class RandomSampleCrop(object):
"""Crop
Arguments:
img (Image): 在训练期间输入的图像
boxes (Tensor): 以pt形式显示的原始边界框
labels (Tensor): 每个bbox的类标签
mode (float tuple): 最小值和最大值重叠
Return:
(img, boxes, classes)
img (Image): 裁剪的图像
boxes (Tensor): 以pt形式调整的边框
labels (Tensor): 每个bbox的类标签
"""
def __init__(self):
self.sample_options = (
# using entire original input image
None,
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
(0.1, None),
(0.3, None),
(0.7, None),
(0.9, None),
# randomly sample a patch
(None, None),
)
def __call__(self, image, boxes=None, labels=None):
height, width, _ = image.shape
while True:
# randomly choose a mode
mode = random.choice(self.sample_options)
if mode is None:
return image, boxes, labels
min_iou, max_iou = mode
if min_iou is None:
min_iou = float('-inf')
if max_iou is None:
max_iou = float('inf')
# max trails (50)
for _ in range(50):
current_image = image
w = random.uniform(0.3 * width, width) #裁剪的w范围[0.3*width, width]
h = random.uniform(0.3 * height, height) #裁剪的h范围[0.3*height, height]
# aspect ratio constraint b/t .5 & 2,如果长宽比不在[0.5,2]之间就重新尝试
if h / w < 0.5 or h / w > 2:
continue
left = random.uniform(width - w) #裁剪图像的min_x
top = random.uniform(height - h) #裁剪图像的max_x
# 得到裁剪图像的[min_x,min_y,max_x,max_y]
rect = np.array([int(left), int(top), int(left+w), int(top+h)])
# 将裁剪图像与gt的框计算IoU
overlap = jaccard_numpy(boxes, rect)
# is min and max overlap constraint satisfied? if not try again
if overlap.min() < min_iou and max_iou < overlap.max():
continue
# 从原图中剪裁新图像
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
:]
# 计算gt的bbox框的中心
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
# 检查剪裁图像的min_x, min_y要分别小于bbox的中心x, y
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
# 检查剪裁图像的max_x, max_y要分别大于bbox的中心x, y
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
# 上述两条要求都要为True
mask = m1 * m2
# 如果由不满足True的情况,就重新尝试
if not mask.any():
continue
# 初始化当前bbox
current_boxes = boxes[mask, :].copy()
# 获得当前各框标签
current_labels = labels[mask]
#取当前各框的min_x和min_y
current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
rect[:2])
# 调整bbox中min_x, min_y位置
current_boxes[:, :2] -= rect[:2]
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
rect[2:])
# 取当前各框的max_x和max_y
current_boxes[:, 2:] -= rect[:2]
return current_image, current_boxes, current_labels
#填充
class Expand(object):
def __init__(self, mean):
self.mean = mean
def __call__(self, image, boxes, labels):
if random.randint(2): #返回小于2的整数,即0或者1
return image, boxes, labels
height, width, depth = image.shape
ratio = random.uniform(1, 4)
left = random.uniform(0, width*ratio - width)
top = random.uniform(0, height*ratio - height)
expand_image = np.zeros(
(int(height*ratio), int(width*ratio), depth),
dtype=image.dtype)
expand_image[:, :, :] = self.mean
expand_image[int(top):int(top + height),
int(left):int(left + width)] = image
image = expand_image
boxes = boxes.copy()
boxes[:, :2] += (int(left), int(top))
boxes[:, 2:] += (int(left), int(top))
return image, boxes, labels
#随机翻转
class RandomMirror(object):
def __call__(self, image, boxes, classes):
_, width, _ = image.shape
if random.randint(2):
image = image[:, ::-1]
boxes = boxes.copy()
boxes[:, 0::2] = width - boxes[:, 2::-2]
return image, boxes, classes
#交换通道
class SwapChannels(object):
def __init__(self, swaps):
self.swaps = swaps
def __call__(self, image):
"""
Args:
image (Tensor): image tensor to be transformed
Return:
a tensor with channels swapped according to swap
"""
# if torch.is_tensor(image):
# image = image.data.cpu().numpy()
# else:
# image = np.array(image)
image = image[:, :, self.swaps]
return image
#Detection.py
import torch
from torch.autograd import Function
from ..box_utils import decode, nms
from data import voc as cfg
class Detect(Function):
"""At test time, Detect is the final layer of SSD. Decode location preds,
apply non-maximum suppression to location predictions based on conf
scores and threshold to a top_k number of output predictions for both
confidence score and locations.
"""
def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh):
self.num_classes = num_classes
self.background_label = bkg_label
self.top_k = top_k
# Parameters used in nms.
self.nms_thresh = nms_thresh
if nms_thresh <= 0:
raise ValueError('nms_threshold must be non negative.')
self.conf_thresh = conf_thresh
self.variance = cfg['variance']
def forward(self, loc_data, conf_data, prior_data):
"""
Args:
loc_data: (tensor) Loc preds from loc layers
Shape: [batch,num_priors*4]
conf_data: (tensor) Shape: Conf preds from conf layers
Shape: [batch*num_priors,num_classes]
prior_data: (tensor) Prior boxes and variances from priorbox layers
Shape: [1,num_priors,4]
"""
num = loc_data.size(0) # batch size
num_priors = prior_data.size(0)
output = torch.zeros(num, self.num_classes, self.top_k, 5)
conf_preds = conf_data.view(num, num_priors,
self.num_classes).transpose(2, 1)
# Decode predictions into bboxes.
for i in range(num):
decoded_boxes = decode(loc_data[i], prior_data, self.variance)
# For each class, perform nms
conf_scores = conf_preds[i].clone()
for cl in range(1, self.num_classes):
c_mask = conf_scores[cl].gt(self.conf_thresh)
scores = conf_scores[cl][c_mask]
if scores.size(0) == 0:
continue
l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
boxes = decoded_boxes[l_mask].view(-1, 4)
# idx of highest scoring and non-overlapping boxes per class
ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
output[i, cl, :count] = \
torch.cat((scores[ids[:count]].unsqueeze(1),
boxes[ids[:count]]), 1)
flt = output.contiguous().view(num, -1, 5)
_, idx = flt[:, :, 0].sort(1, descending=True)
_, rank = idx.sort(1)
flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
return output
定义:
两个BOX区域的交集比上并集。
在维度一致的前提下:
1.计算两个box左上角点坐标的最大值和右下角坐标的最小值
2.计算交集面积
3.把交集面积除以对应的并集面积
def jaccard(box_a, box_b):
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2]-box_a[:, 0]) *
(box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
area_b = ((box_b[:, 2]-box_b[:, 0]) *
(box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
union = area_a + area_b - inter
return inter / union # [A,B]
**编码:**得到预测框相对于default box的偏移量 l
def encode(matched, priors, variances):
# 编码中心坐标cx,cy
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
# encode variance
g_cxcy /= (variances[0] * priors[:, 2:]) #计算cx,cy,的偏差占框的宽和高的比例
# 编码宽高w,h
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = torch.log(g_wh) / variances[1]
# return target for smooth_l1_loss
return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
解码从预测值 l中得到边界框的真实值
def decode(loc, priors, variances):
boxes = torch.cat((
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes