在GIoU之前,主要是以IoU或者 l n n o r m l_n norm lnnorm作为损失函数的,而这些损失函数实际上是不够精细的。也就是说,即使观感上的差距很大,损失的值也会相同。以giou论文里举出的(a)图为例:
此时以 l 2 n o r m l_2norm l2norm作为损失函数(也就是两对点之间的欧氏距离),为了简便起见,固定一个点不动(假设是左下角的点不动),以另一个点为圆心,做一个半径为r的圆,那么此时落在圆上的任意一点与固定点组成的bbox(黑色)和ground truth(绿色)之间的 l 2 n o r m l_2 norm l2norm损失值都是相同的(如a所示),在观感上,我们可能会趋向于最右侧的预测结果,但是在计算机使用 l 2 l_2 l2损失的时候是无法区分的。
而 G I o U = I o U − C − ( A ∪ B ) C GIoU= IoU - \frac {C-(A \cup B)} {C} GIoU=IoU−CC−(A∪B)
def getConvexShape(bbox_gt,bbox_pd):
return [min(bbox_gt[0],bbox_pd[0]),min(bbox_gt[1],bbox_pd[1]),max(bbox_gt[2],bbox_pd[2]),max(bbox_gt[3],bbox_pd[3])]
def getBboxArea(bbox):
if bbox[2] - bbox[0] > 0 and bbox[3]-bbox[1] > 0:
return (bbox[2]-bbox[0]) * (bbox[3]-bbox[1])
return 0.0
def getIntersection(bbox_gt,bbox_pd):
return [max(bbox_gt[0],bbox_pd[0]),max(bbox_gt[1],bbox_pd[1]),min(bbox_gt[2],bbox_pd[2]),min(bbox_gt[3],bbox_pd[3])]
def getEnclosingBbox(bbox_cvx,bbox_gt):
return [min(bbox_cvx[0],bbox_gt[0]),min(bbox_cvx[1],bbox_gt[1]),max(bbox_cvx[2],bbox_gt[2]),max(bbox_cvx[3],bbox_gt[3])]
def getGIoU(bbox_gt,bbox_pd):
#1. get bbox_hat
bbox_cvx = getConvexShape(bbox_gt,bbox_pd)
#2. caculate the area of ground truth
area_bbox_gt = getBboxArea(bbox_gt)
#3. caculate the area of bbox_hat
area_bbox_cvx = getBboxArea(bbox_cvx)
area_bbox_pd = getBboxArea(bbox_pd)
#4. caculate the area of Intersection
bbox_inter = getIntersection(bbox_gt,bbox_pd)
area_bbox_inter = getBboxArea(bbox_inter)
#5. Finding the coordinate of smallest enclosing box
bbox_enclose = getEnclosingBbox(bbox_cvx,bbox_gt)
#6. caculate the area of bbox_enclose
area_bbox_enclose = getBboxArea(bbox_enclose)
#7. caculate IoU
#area_u = area_bbox_gt + area_bbox_cvx - area_bbox_inter # in paper
#IoU = area_bbox_inter / area_u # in paper
IoU = area_bbox_inter / ( area_bbox_gt + area_bbox_pd - area_bbox_inter )
#8. caculate GIoU
#GIoU = IoU - (( area_bbox_enclose - area_u ) / area_bbox_enclose) # in paper
GIoU = IoU - ( ( area_bbox_cvx - area_bbox_gt - area_bbox_pd + area_bbox_inter ) / area_bbox_cvx )
return IoU,GIoU
bbox_gt = [5,5,10,10]
bbox_pd_1 = [0,0,3,3]
bbox_pd_2 = [0,0,1,1]
R D I o U = ρ 2 ( b , b g t ) c 2 \bm R_{DIoU} = \frac {\rho^2( \bm {b}, \bm {b^{gt}})}{c^2} RDIoU=c2ρ2(b,bgt)
其中 ρ 2 ( b , b g t ) {\rho^2( \bm {b}, \bm {b^{gt}})} ρ2(b,bgt)指的是bbox与gt中心点之间的距离,而c指的是最小凸集对角线的长度。
L D I o U = 1 − I o U + R D I o U L_{DIoU}= 1 - IoU + R_{DIoU} LDIoU=1−IoU+RDIoU
import math
def getCenterPoint(bbox):
return (bbox[2]+bbox[0]) / 2. , (bbox[3]+bbox[1]) / 2.
def getDistance(point1,point2):
return math.sqrt((point1[0]-point2[0]) ** 2 + (point1[1]-point2[1]) ** 2)
def getDIoU(bbox_gt,bbox_pd):
#1. get bbox_hat
bbox_cvx = getConvexShape(bbox_gt,bbox_pd)
#2. caculate the area of ground truth
area_bbox_gt = getBboxArea(bbox_gt)
#3. caculate the area of predictions
area_bbox_pd = getBboxArea(bbox_pd)
#4. caculate the area of Intersection
bbox_inter = getIntersection(bbox_gt,bbox_pd)
area_bbox_inter = getBboxArea(bbox_inter)
#5. caculate the center points
center_point_gt = getCenterPoint(bbox_gt)
center_point_pd = getCenterPoint(bbox_pd)
#6. caculate the distance between the center point gt and the center point pd
rho_2 = getDistance(center_point_gt, center_point_pd) ** 2
#7. caculate the diag distance of cvx shape
c_2 = getDistance(bbox_cvx[0:2], bbox_cvx[2:4]) ** 2
#8. caculate IoU
IoU = area_bbox_inter / ( area_bbox_gt + area_bbox_pd - area_bbox_inter )
#9. get DIoU
DIoU = IoU - rho_2 / c_2
return IoU, DIoU
在DIoU的基础上,为了让模型更快更好的收敛,原作者总结了三个目标检测损失函数应该考虑的因素,分别是overlap area, central point distance and aspect ratio。
R C I o U = α v R_{CIoU}= \alpha v RCIoU=αv
其中 α = v ( 1 − I o U ) + v \alpha = \frac{v}{(1-IoU)+v} α=(1−IoU)+vv
v = 4 π 2 ( a r c t a n w g t h g t − a r c t a n w h ) 2 v=\frac{4}{\pi^2}(arctan\frac{w^{gt}}{h^{gt}}-arctan\frac{w}{h})^2 v=π24(arctanhgtwgt−arctanhw)2
def getWidthAndHeight(bbox):
return bbox[2]-bbox[0],bbox[3]-bbox[1]
def getCIoU(bbox_gt,bbox_pd):
#1. get bbox_hat
bbox_cvx = getConvexShape(bbox_gt,bbox_pd)
#2. caculate the area of ground truth
area_bbox_gt = getBboxArea(bbox_gt)
#3. caculate the area of predictions
area_bbox_pd = getBboxArea(bbox_pd)
#4. caculate the area of Intersection
bbox_inter = getIntersection(bbox_gt,bbox_pd)
area_bbox_inter = getBboxArea(bbox_inter)
#5. caculate the center points
center_point_gt = getCenterPoint(bbox_gt)
center_point_pd = getCenterPoint(bbox_pd)
#6. caculate the distance between the center point gt and the center point pd
rho_2 = getDistance(center_point_gt, center_point_pd) ** 2
#7. caculate the diag distance of cvx shape
c_2 = getDistance(bbox_cvx[0:2], bbox_cvx[2:4]) ** 2
#8. caculate IoU
IoU = area_bbox_inter / ( area_bbox_gt + area_bbox_pd - area_bbox_inter )
#9. caculate the width and height of the bboxes
w_gt,h_gt = getWidthAndHeight(bbox_gt)
w_pd,h_pd = getWidthAndHeight(bbox_pd)
#10. caculate the aspect ratio in cIoU
ciou_v = 4/(math.pi ** 2) * (math.atan(w_gt/h_gt) - math.atan(w_pd/h_pd)) ** 2
#11. caculate the alpha in cIoU
ciou_alpha = ciou_v / (1 - IoU + ciou_v)
#12. caculate the ciou
CIoU = IoU - rho_2 / c_2 - ciou_alpha * ciou_v
return IoU,CIoU