想学习检测,yolov3很多基于darknet,不是很方便,转向同样轻量级的SSD
https://zhuanlan.zhihu.com/p/79854543
https://zhuanlan.zhihu.com/p/79933177
https://zhuanlan.zhihu.com/p/66332452
本次采用VOC07+12,20类,加上背景21类
原始SSD300,共6个尺度,从38*38逐步/2,最后到1*1,mobile版本从19*19开始
def forward(self, x):
"""Applies network layers and ops on input image(s) x.
Args:
x: input image or batch of images. Shape: [batch,3,300,300].
Return:
Depending on phase:
test:
Variable(tensor) of output class label predictions,
confidence score, and corresponding location predictions for
each object detected. Shape: [batch,topk,7]
train:
list of concat outputs from:
1: confidence layers, Shape: [batch*num_priors,num_classes]
2: localization layers, Shape: [batch,num_priors*4]
3: priorbox layers, Shape: [2,num_priors*4]
"""
sources = list()
loc = list()
conf = list()
# apply vgg up to conv4_3 relu
x = self.mobilenet.conv1(x) #torch.Size([32, 16, 150, 150])
x = self.mobilenet.bn1(x) #torch.Size([32, 16, 150, 150])
x = self.mobilenet.activation(x) #torch.Size([32, 16, 150, 150])
for i in self.mobilenet.bottlenecks[:5]:
x = i(x)
'''
torch.Size([32, 16, 150, 150]) B_0
torch.Size([32, 24, 75, 75])
torch.Size([32, 32, 38, 38])
torch.Size([32, 64, 19, 19])
torch.Size([32, 96, 19, 19])
torch.Size([32, 160, 10, 10])
torch.Size([32, 320, 10, 10])
'''
#s = self.L2Norm(x)
sources.append(x) #torch.Size([32, 96, 19, 19])
# apply vgg up to fc7
for i in self.mobilenet.bottlenecks[5:]:
x = i(x)
x = self.mobilenet.conv_last(x)
x = self.mobilenet.bn_last(x)
x = self.mobilenet.activation(x)
sources.append(x) #torch.Size([32, 1280, 10, 10])
# apply extra layers and cache source layer outputs
for k, v in enumerate(self.extras):
#print(x.size())
#print(v(x).size())
# x = F.relu(v(x), inplace=True)
x = v(x)
if k % 2 == 1:
sources.append(x) #[[512,5],[256,3],[256,2],[128,1] ? 512 256 256 128
# apply multibox head to source layers
for (x, l, c) in zip(sources, self.loc, self.conf):
loc.append(l(x).permute(0, 2, 3, 1).contiguous()) #[B,4*num_box,H1,W1]--> [B,H,W,4*num_box]
conf.append(c(x).permute(0, 2, 3, 1).contiguous())
loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
'''
[B,H1*W1*16+H2*W2*24],一batch的所有框
torch.Size([32, 16, 19, 19])
torch.Size([32, 24, 10, 10])
torch.Size([32, 24, 5, 5])
torch.Size([32, 24, 3, 3])
torch.Size([32, 24, 2, 2])
torch.Size([32, 24, 1, 1])
'''
conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
if self.phase == "test":
output = self.detect(
loc.view(loc.size(0), -1, 4), # loc preds
self.softmax(conf.view(conf.size(0), -1,
self.num_classes)), # conf preds
self.priors.type(type(x.data)) # default boxes
)
else:
output = (
loc.view(loc.size(0), -1, 4), # b*n*4,n为一张图所有的框
conf.view(conf.size(0), -1, self.num_classes),
self.priors # #顺序 1_min,1_max,2_max_,2_min [cx,cy,w,h] column,row,feature,大小 n*4
)
return output
简单来说,自己总结
从网络中抽取 96*19*19,1280*10*10 512*5*5 256*3*3 256*3*3 128*1*1 进行后续操作
原始 SSD是 38*38 19*19 10*10 5*5 3*3 1*1,有不同
通过 loc 和 conf 提取特征信息,分别再形成6个feature,用于计算loss,设定6层每个点的anchor数量依次为 466666
所以 loc 和 conf 不同的只是通道数, 4*num_anchor, 21*num_anchor
例如 loc的6个输出
torch.Size([32, 16, 19, 19])
torch.Size([32, 24, 10, 10])
torch.Size([32, 24, 5, 5])
torch.Size([32, 24, 3, 3])
torch.Size([32, 24, 2, 2])
torch.Size([32, 24, 1, 1])
再把 loc 和 conf 压扁 成 Batch_size*x
train:返回 output = (
loc.view(loc.size(0), -1, 4), # b*n*4,n为一张图所有的框,6个尺度
conf.view(conf.size(0), -1, self.num_classes),
self.priors # #顺序anchor 1_min,1_max,2_max_,2_min,(3_max,3_min
后面5个尺度才有) [cx,cy,w,h] 按照column,row顺序来的,feature,大小 n*4
cx,cy是网格点中心,cx,cy,w,h 均为0-1的数
)
test: output = self.detect(
loc.view(loc.size(0), -1, 4), # loc preds
self.softmax(conf.view(conf.size(0), -1,
self.num_classes)), # conf preds,NNL要自己siftmax
self.priors.type(type(x.data)) # default boxes
)
读取XML文件得到
def __init__(self, root,
image_sets=[('2007', 'trainval'), ('2012', 'trainval')],
transform=None, target_transform=VOCAnnotationTransform(),
dataset_name='VOC0712'):
self.root = root
self.image_set = image_sets
self.transform = transform
self.target_transform = target_transform
self.name = dataset_name
self._annopath = osp.join('%s', 'Annotations', '%s.xml')
self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
self.ids = list()
for (year, name) in image_sets:
rootpath = osp.join(self.root, 'VOC' + year)
for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
self.ids.append((rootpath, line.strip())) #07+12 [(.../VOC2007,000001),()]
def __getitem__(self, index):
im, gt, h, w = self.pull_item(index)
return im, gt # # return torch.from_numpy(img) C*H*W , target(# [[xmin, ymin, xmax, ymax, label_ind], ... ],# 0-1), height, width
def __len__(self):
return len(self.ids)
def pull_item(self, index):
img_id = self.ids[index]
target = ET.parse(self._annopath % img_id).getroot()
img = cv2.imread(self._imgpath % img_id)
height, width, channels = img.shape
if self.target_transform is not None:
target = self.target_transform(target, width, height)
if self.transform is not None:
target = np.array(target)
img, boxes, labels = self.transform(img, target[:, :4], target[:, 4])
# to rgb
img = img[:, :, (2, 1, 0)]
# img = img.transpose(2, 0, 1)
target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
return torch.from_numpy(img).permute(2, 0, 1), target, height, width
# return torch.from_numpy(img) C*H*W , target(# [[xmin, ymin, xmax, ymax, label_ind], ... ],# 0-1,相对于自己的w,h), height, width
自定义 MultiBoxLoss(nn.Module),在forward中实现loss计算
class MultiBoxLoss(nn.Module):
"""SSD Weighted Loss Function
Compute Targets:
1) Produce Confidence Target Indices by matching ground truth boxes
with (default) 'priorboxes' that have jaccard index > threshold parameter
(default threshold: 0.5).
2) Produce localization target by 'encoding' variance into offsets of ground
truth boxes and their matched 'priorboxes'.
3) Hard negative mining to filter the excessive number of negative examples
that comes with using a large number of default bounding boxes.
(default negative:positive ratio 3:1)
Objective Loss:
L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
weighted by α which is set to 1 by cross val.
Args:
c: class confidences,
l: predicted boxes,
g: ground truth boxes
N: number of matched default boxes
See: https://arxiv.org/pdf/1512.02325.pdf for more details.
criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
False, args.cuda)
"""
def __init__(self, num_classes, overlap_thresh, prior_for_matching,
bkg_label, neg_mining, neg_pos, neg_overlap, encode_target,
use_gpu=True):
super(MultiBoxLoss, self).__init__()
self.use_gpu = use_gpu
self.num_classes = num_classes #21
self.threshold = overlap_thresh #0.5
self.background_label = bkg_label #0
self.encode_target = encode_target # false
self.use_prior_for_matching = prior_for_matching #true
self.do_neg_mining = neg_mining #true
self.negpos_ratio = neg_pos #3 3:1
self.neg_overlap = neg_overlap #0.5
self.variance = [0.1,0.2]
def forward(self, predictions, targets):
"""Multibox Loss
Args:
predictions (tuple): A tuple containing loc preds, conf preds,
and prior boxes from SSD net.
conf shape: torch.size(batch_size,num_priors,num_classes)
loc shape: torch.size(batch_size,num_priors,4)
priors shape: torch.size(num_priors,4) cx cy w h 0-1
targets (tensor): Ground truth boxes and labels for a batch,
shape: [batch_size,num_objs,5] (last idx is the label).
"""
loc_data, conf_data, priors = predictions
num = loc_data.size(0) #batch 数量
priors = priors[:loc_data.size(1), :] #torch.size(num_priors,4)
num_priors = (priors.size(0))
num_classes = self.num_classes
# match priors (default boxes) and ground truth boxes
loc_t = torch.Tensor(num, num_priors, 4)
conf_t = torch.LongTensor(num, num_priors) #方便onehot
# conf_t = torch.zeros(num,num_priors).long()
for idx in range(num): #一张图片一张图片来
target = targets[idx] #[xm,ym,xmax,ymax,label] 0-1
truths = target[:, :-1].data #gt, size=num_objects*4,, object 不等于 prior
labels = target[:, -1].data #(num_obj,)
defaults = priors.data
match(self.threshold, truths, defaults, self.variance, labels,
loc_t, conf_t, idx) #loc_t-->[B,num_pro,4],每个prio都有一个自己的GT,理想偏移。 con——t, [B,num_priors] top class label for each prior,21类别
if self.use_gpu:
loc_t = loc_t.cuda() #所有loc_均已改变
conf_t = conf_t.cuda() # B*num_prio
# wrap targets
loc_t = Variable(loc_t, requires_grad=False) #[num_pro,4]
conf_t = Variable(conf_t, requires_grad=False)
pos = conf_t > 0 #0 or 1,mask # B*num_prio
num_pos = pos.sum(dim=1, keepdim=True) # 一个batch,prio非背景数量。 B*1
# Localization Loss (Smooth L1)
# Shape: [batch,num_priors,4]
pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) #(batch_size,num_priors,4),竖着复制一遍,mask
loc_p = loc_data[pos_idx].view(-1, 4) #先拍成一列,再view, n*4, n /b < n_prio, 只考虑含物体的loc
loc_t = loc_t[pos_idx].view(-1, 4) # 正样本GT,一个batch非常少
loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # 计算smooth-l1,返回标量。
# Compute max conf across batch for hard negative mining, loss=[b*num_prio,1]
batch_conf = conf_data.view(-1, self.num_classes) # (batch_size,num_priors,num_classes)-->
loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # 后一个batch_conf; --> (B*Num_prio,1), 对应类的conf值.
# Hard Negative Mining
# loss_c[pos] = 0 # filter out pos boxes for now
# loss_c = loss_c.view(num, -1)
# Hard Negative Mining
loss_c = loss_c.view(num, -1)
loss_c[pos] = 0 ## B*num_prio,只看背景的prio
_, loss_idx = loss_c.sort(1, descending=True) #每一张图中降序,排前面的背景框
_, idx_rank = loss_idx.sort(1) #告诉每批次,每张图的排队位置
num_pos = pos.long().sum(1, keepdim=True) #每张图非背景数量,B*1
num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) #至少选出3倍数量的negative, B*1
neg = idx_rank < num_neg.expand_as(idx_rank) # B*num_prio, 少量的backgroud是1,其余是0
# Confidence Loss Including Positive and Negative Examples
pos_idx = pos.unsqueeze(2).expand_as(conf_data) # B*num_prio--> batch_size,num_priors,num_classes 0是背景,1是物体
neg_idx = neg.unsqueeze(2).expand_as(conf_data) # B*num_prio*21, 少量的backgroud是1,其余是0
conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) # 0+1或者1+0的框被选中, 最终参与计算conf——loss, n*21
targets_weighted = conf_t[(pos+neg).gt(0)] # 对应框的target,(n,),0-20
if cfg.USE_FL:
alpha = np.array([[0.25], [0.75], [0.75], [0.75], [0.75],
[0.75], [0.75], [0.75], [0.75], [0.75],
[0.75], [0.75], [0.75], [0.75], [0.75],
[0.75], [0.75], [0.75], [0.75], [0.75], [0.75]]) # 21*1,背景类权重下降
alpha = torch.Tensor(alpha)
compute_c_loss = focal_loss.FocalLoss(alpha=alpha, gamma=2, class_num=num_classes, size_average=False)
loss_c = compute_c_loss(conf_p, targets_weighted) #(n,21) (n,)
else:
loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
# Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
N = num_pos.data.sum() # n_pos , <_prio
loss_l /= N
loss_c /= N
# print("N",N,"\t","loss_l",loss_l,"\t","loss_c",loss_c)
return loss_l, loss_c
# -*- coding: utf-8 -*-
import torch
def point_form(boxes):
""" Convert prior_boxes to (xmin, ymin, xmax, ymax)
representation for comparison to point form ground truth data.
Args:
boxes: (tensor) center-size default boxes from priorbox layers.
Return:
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
"""
return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin
boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax
def center_size(boxes):
""" Convert prior_boxes to (cx, cy, w, h)
representation for comparison to center-size form ground truth data.
Args:
boxes: (tensor) point_form boxes
Return:
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
"""
return torch.cat((boxes[:, 2:] + boxes[:, :2])/2, # cx, cy
boxes[:, 2:] - boxes[:, :2], 1) # w, h
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def jaccard(box_a, box_b):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
"""
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2]-box_a[:, 0]) *
(box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
area_b = ((box_b[:, 2]-box_b[:, 0]) *
(box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
union = area_a + area_b - inter
return inter / union # [A,B]
def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
"""Match each prior box with the ground truth box of the highest jaccard
overlap, encode the bounding boxes, then return the matched indices
corresponding to both confidence and location preds.
Args:
threshold: (float) The overlap threshold used when mathing boxes. 0.5
truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. num_obj*4
priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. [n_priors,4]
variances: (tensor) Variances corresponding to each prior coord, [0.1,0.2]
Shape: [num_priors, 4].
labels: (tensor) All the class labels for the image, Shape: [num_obj]. [num_obj]
loc_t: (tensor) Tensor to be filled w/ endcoded location targets. (num_priors, 4)
conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. (num_priors,)
idx: (int) current batch index
Return:
The matched indices corresponding to 1)location and 2)confidence preds.
"""
# if len(truths) ==0:
# conf_t[idx] = 0
# return
# jaccard index
overlaps = jaccard( #return iou, [num_obj *num_prior]
truths,
point_form(priors) #prio ,cx,cy,h,w 0-1
)
# (Bipartite Matching)
# [1,num_objects] best prior for each ground truth
best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) #[num_oj,1],[nu_obj,1]
# [1,num_priors] best ground truth for each prior
best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True) #[1,num_prio],[1.num_prio]
best_truth_idx.squeeze_(0) #[num_prio]
best_truth_overlap.squeeze_(0)
best_prior_idx.squeeze_(1)
best_prior_overlap.squeeze_(1)
best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior,GT指定的框 conf为2
# TODO refactor: index best_prior_idx with long tensor
# ensure every gt matches with its prior of max overlap
for j in range(best_prior_idx.size(0)):
best_truth_idx[best_prior_idx[j]] = j # 被GT制定的prio框的index更改
matches = truths[best_truth_idx] # Shape: [num_priors,4] every prio 对应de GT
conf = labels[best_truth_idx] + 1 # Shape: [num_priors] # 0代表背景,20类变成21类。 label是20类
conf[best_truth_overlap < threshold] = 0 # label as background # conf [num_prio]
loc = encode(matches, priors, variances) #loc [num_prio,4]
loc_t[idx] = loc # [num_priors,4] encoded offsets to learn ,相当于prio相当于 GT的偏移,是我们希望网络学习的完美输出。lov_t[b,num_prio,4]
conf_t[idx] = conf # [num_priors] top class label for each prior
def encode(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 4].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes [0.1,0.2]
Return:
encoded boxes (tensor), Shape: [num_priors, 4]
"""
# dist b/t match center and prior's center
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] # prio ,cx,cy,w,h
# encode variance
g_cxcy /= (variances[0] * priors[:, 2:])
# match wh / prior wh
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = torch.log(g_wh) / variances[1]
# return target for smooth_l1_loss
return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
# Adapted from https://github.com/Hakuyume/chainer-ssd
def decode(loc, priors, variances):
"""Decode locations from predictions using priors to undo
the encoding we did for offset regression at train time.
Args:
loc (tensor): location predictions for loc layers,
Shape: [num_priors,4]
priors (tensor): Prior boxes in center-offset form.
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
decoded bounding box predictions
"""
boxes = torch.cat((
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes # 最好是GT_prio,实际很接近
def log_sum_exp(x):
"""Utility function for computing log_sum_exp while determining
This will be used to determine unaveraged confidence loss across
all examples in a batch.
Args:
x (Variable(tensor)): conf_preds from conf layers, (b*num_prio,21)
"""
x_max = x.data.max()
return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
def nms(boxes, scores, overlap=0.5, top_k=200):
"""Apply non-maximum suppression at test time to avoid detecting too many
overlapping bounding boxes for a given object.
Args:
boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
scores: (tensor) The class predscores for the img, Shape:[num_priors].
overlap: (float) The overlap thresh for suppressing unnecessary boxes.
top_k: (int) The Maximum number of box preds to consider.
Return:
The indices of the kept boxes with respect to num_priors.
"""
keep = scores.new(scores.size(0)).zero_().long()
if boxes.numel() == 0:
return keep
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
area = torch.mul(x2 - x1, y2 - y1)
v, idx = scores.sort(0) # sort in ascending order
# I = I[v >= 0.01]
idx = idx[-top_k:] # indices of the top-k largest vals,从最大的200个面开始筛选
xx1 = boxes.new()
yy1 = boxes.new()
xx2 = boxes.new()
yy2 = boxes.new()
w = boxes.new()
h = boxes.new()
# keep = torch.Tensor()
count = 0
while idx.numel() > 0:
i = idx[-1] # index of current largest val
# keep.append(i)
keep[count] = i
count += 1
if idx.size(0) == 1:
break
idx = idx[:-1] # remove kept element from view
# load bboxes of next highest vals
torch.index_select(x1, 0, idx, out=xx1)
torch.index_select(y1, 0, idx, out=yy1)
torch.index_select(x2, 0, idx, out=xx2)
torch.index_select(y2, 0, idx, out=yy2)
# store element-wise max with next highest score
xx1 = torch.clamp(xx1, min=x1[i])
yy1 = torch.clamp(yy1, min=y1[i])
xx2 = torch.clamp(xx2, max=x2[i])
yy2 = torch.clamp(yy2, max=y2[i])
w.resize_as_(xx2)
h.resize_as_(yy2)
w = xx2 - xx1
h = yy2 - yy1
# check sizes of xx1 and xx2.. after each iteration
w = torch.clamp(w, min=0.0)
h = torch.clamp(h, min=0.0)
inter = w*h
# IoU = i / (area(a) + area(b) - i)
rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
union = (rem_areas - inter) + area[i]
IoU = inter/union # store result in iou
# keep only elements with an IoU <= overlap
idx = idx[IoU.le(overlap)]
return keep, count #一张图中pred框相对的编号
https://zhuanlan.zhihu.com/p/70667071
https://zhuanlan.zhihu.com/p/70667071 这个写的太好了,以这个为主
import numpy as np
def voc_ap(rec, prec, use_07_metric=False):
""" ap = voc_ap(rec, prec, [use_07_metric])
Compute VOC AP given precision and recall.
If use_07_metric is true, uses the
VOC 07 11 point method (default:False).
"""
# 针对2007年VOC,使用的11个点计算AP,现在不使用
if use_07_metric:
# 11 point metric
ap = 0.
for t in np.arange(0., 1.1, 0.1):
if np.sum(rec >= t) == 0:
p = 0
else:
p = np.max(prec[rec >= t])
ap = ap + p / 11.
else:
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], rec, [1.])) #[0. 0.0666, 0.1333, 0.4 , 0.4666, 1.]
mpre = np.concatenate(([0.], prec, [0.])) #[0. 1., 0.6666, 0.4285, 0.3043, 0.]
# compute the precision envelope
# 计算出precision的各个断点(折线点)
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) #[1. 1. 0.6666 0.4285 0.3043 0. ]
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0] #precision前后两个值不一样的点
print(mrec[1:], mrec[:-1])
print(i) #[0, 1, 3, 4, 5]
# AP= AP1 + AP2+ AP3+ AP4
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
rec = np.array([0.0666, 0.1333,0.1333, 0.4, 0.4666])
prec = np.array([1., 0.6666, 0.6666, 0.4285, 0.3043])
ap = voc_ap(rec, prec)
print(ap) #输出:0.2456