COCO数据集:
{
"info": info,
"licenses": [license],
"images": [image],
"annotations": [annotation],
"categories": [category]
}
数据集信息
许可
类别:大类-小类(80)
images:40504
annotations:291875
coco.py 中定义了以下 API:
COCO 类 - 用于加载 COCO 标注文件并准备所需的数据结构.
decodeMask - Decode binary mask M encoded via run-length encoding.
encodeMask - Encode binary mask M using run-length encoding.
getAnnIds - 获取满足条件的标注信息的 ids.
getCatIds - 获取满足条件的类别的 ids.
getImgIds - 获取满足条件的图片的 ids.
loadAnns - 加载指定 ids 对应的标注信息.
loadCats - 加载指定 ids 对应的类别.
loadImgs - 加载指定 ids 对应的图片.
annToMask - 将 segmentation 标注信息转换为二值 mask.
showAnns - 显示指定 ids 的标注信息到对应的图片上.
loadRes - 加载算法的结果并创建可用于访问数据的 API .
download - 从 mscoco.org 服务器下载 COCO 图片数据集.
这样做的好处就是: 对于给定的图片 id, 就可以快速找到其所有的标注信息; 对于给定的类别 id, 就可以快速找到属于该类的所有图片.
def createIndex(self):
"""
创建索引, 属于构造函数的一部分.
"""
# create index
print('creating index...')
anns, cats, imgs = {}, {}, {}
# imgToAnns: 将图片 id 和标注信息 关联; 给定图片 id, 就可以找到其所有的标注信息
# catToImgs: 将类别 id 和图片 id 关联; 给定类别 id, 就可以找到属于这类的所有图片
imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
imgToAnns[ann['image_id']].append(ann)
anns[ann['id']] = ann
if 'images' in self.dataset:
for img in self.dataset['images']:
imgs[img['id']] = img
if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat
if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
catToImgs[ann['category_id']].append(ann['image_id'])
print('index created!!!')
# 给几个类成员赋值
self.anns = anns
self.imgs = imgs
self.cats = cats
self.imgToAnns = imgToAnns
self.catToImgs = catToImgs
关于指标问题:
概要:
https://m.oldpan.me/archives/understand-coco-metric
Pixel Accuracy像素准确度
表示检测物体的准确度,重点判断标准为是否检测到了物体
Pixel Precision像素精度
代表检测到所有的物体中覆盖的精确度,重点判断mask是否精确地覆盖到了该物体。
像素精度是在已经检测到的基础上(不论检测是否失误)进行评测的标准
IOU概念:
https://oldpan.me/archives/iu-iou-intersection-over-union-python
语义分割
https://blog.csdn.net/u014593748/article/details/71698246?fps=1&locationNum=5
检测-分类
https://oldpan.me/archives/understand-coco-metric
https://blog.csdn.net/Gentleman_Qin/article/details/84519388
https://zhuanlan.zhihu.com/p/33273532
https://blog.csdn.net/syoung9029/article/details/56276567#t0
COCO数据集太长了,val2014高达40504张,我们只做一些小测试,修改代码,只载入一部分:
mmdetection/mmdet/datasets/coco.py
def load_annotations(self, ann_file):
self.coco = COCO(ann_file)
self.cat_ids = self.coco.getCatIds()
self.cat2label = {
cat_id: i + 1
for i, cat_id in enumerate(self.cat_ids)
}
self.img_ids = self.coco.getImgIds()
img_infos = []
# for i in self.img_ids:
# info = self.coco.loadImgs([i])[0]
# info['filename'] = info['file_name']
# img_infos.append(info)
for key, i in enumerate(self.img_ids):
if key == 10:
break
info = self.coco.loadImgs([i])[0]
info['filename'] = info['file_name']
img_infos.append(info)
return img_infos
通过枚举函数控制多少个,我们这里只需要10个
接着我们看下图片经过网络后的输出:
mmdetection/tools/test.py
def single_gpu_test(model, data_loader, show=False, ids=None, groundtruth=None, scale_test=None):
model.eval()
results = []
dataset = data_loader.dataset
prog_bar = mmcv.ProgressBar(len(dataset))
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, rescale=not show, **data)
mmdetection/mmdet/models/detectors/base.py
def show_result(self,
data,
result,
img_norm_cfg,
dataset=None,
score_thr=0.3,
ids=None,
groundtruth=None,
scale_test=None,
ori_hW=None):
if isinstance(result, tuple):
bbox_result, segm_result = result
else:
bbox_result, segm_result = result, None
两个都是0-79,共80类
以bbox为例,每一类有很多框或分割:
可视化展示一下第一类的(10,5)
一共五个属性
在mmcv/visualization/image.py中可以知道,是左上x,y 右下x,y 置信度
for bbox, label in zip(bboxes, labels):
bbox_int = bbox.astype(np.int32)
left_top = (bbox_int[0], bbox_int[1])
right_bottom = (bbox_int[2], bbox_int[3])
cv2.rectangle(
img, left_top, right_bottom, bbox_color, thickness=thickness)
label_text = class_names[
label] if class_names is not None else 'cls {}'.format(label)
if len(bbox) > 4:
label_text += '|{:.02f}'.format(bbox[-1])
cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2),
cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color)
直接合并成一个array:
bboxes = np.vstack(bbox_result)
合并后的array有其对应的lable:
labels = [
np.full(bbox.shape[0], i, dtype=np.int32)
for i, bbox in enumerate(bbox_result)
]
labels = np.concatenate(labels)
置信度阈值为
score_thr=0.3
大于阈值的显示:
if score_thr > 0:
assert bboxes.shape[1] == 5
scores = bboxes[:, -1]
inds = scores > score_thr
bboxes = bboxes[inds, :]
labels = labels[inds]
bbox_color = color_val(bbox_color)
text_color = color_val(text_color)
for bbox, label in zip(bboxes, labels):
bbox_int = bbox.astype(np.int32)
left_top = (bbox_int[0], bbox_int[1])
right_bottom = (bbox_int[2], bbox_int[3])
cv2.rectangle(
img, left_top, right_bottom, bbox_color, thickness=thickness)
label_text = class_names[
label] if class_names is not None else 'cls {}'.format(label)
if len(bbox) > 4:
label_text += '|{:.02f}'.format(bbox[-1])
cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2),
cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color)
以segm_result为例:
if segm_result is not None:
segms = mmcv.concat_list(segm_result)
inds = np.where(bboxes[:, -1] > score_thr)[0]
for i in inds:
color_mask = np.random.randint(
0, 256, (1, 3), dtype=np.uint8)
mask = maskUtils.decode(segms[i]).astype(np.bool)
img_show[mask] = img_show[mask] * 0.5 + color_mask * 0.5
segms:
通过阈值0.3判断哪些mask和bbox会被显示:
遍历符合阈值的mask,随机生成颜色(因为是实例分割):
color_mask = np.random.randint(0, 256, (1, 3), dtype=np.uint8)
这里返回的seg是被编码后的,需要
pycocotools
解码:
mask = maskUtils.decode(segms[i]).astype(np.bool)
让我们回到mmdetection/tools/test.py
def single_gpu_test(model, data_loader, show=False, ids=None, groundtruth=None, scale_test=None):
返回我们需要的数据结果集合
results
一共10张图像的结果:
第一张:COCO_val2014_000000391895.jpg
configs/mask_rcnn_r50_fpn_1x.py checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth --eval bbox segm --out temp_pred/maskrcnn_out.pkl
进行评估:检测和分割
evaluate bbox and segm
先保存到json
def results2json(dataset, results, out_file):
result_files = dict()
if isinstance(results[0], list):
json_results = det2json(dataset, results)
result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox')
result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox')
mmcv.dump(json_results, result_files['bbox'])
elif isinstance(results[0], tuple):
json_results = segm2json(dataset, results)
result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox')
result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox')
result_files['segm'] = '{}.{}.json'.format(out_file, 'segm')
mmcv.dump(json_results[0], result_files['bbox'])
mmcv.dump(json_results[1], result_files['segm'])
elif isinstance(results[0], np.ndarray):
json_results = proposal2json(dataset, results)
result_files['proposal'] = '{}.{}.json'.format(out_file, 'proposal')
mmcv.dump(json_results, result_files['proposal'])
else:
raise TypeError('invalid type of results')
return result_files
进入
coco_eval(result_files, eval_types, dataset.coco)
def coco_eval(result_files, result_types, coco, max_dets=(100, 300, 1000)):
for res_type in result_types:
assert res_type in [
'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
]
if mmcv.is_str(coco):
coco = COCO(coco)
assert isinstance(coco, COCO)
if result_types == ['proposal_fast']:
ar = fast_eval_recall(result_files, coco, np.array(max_dets))
for i, num in enumerate(max_dets):
print('AR@{}\t= {:.4f}'.format(num, ar[i]))
return
for res_type in result_types:
result_file = result_files[res_type]
assert result_file.endswith('.json')
coco_dets = coco.loadRes(result_file)
img_ids = coco.getImgIds()
iou_type = 'bbox' if res_type == 'proposal' else res_type
cocoEval = COCOeval(coco, coco_dets, iou_type)
cocoEval.params.imgIds = img_ids
if res_type == 'proposal':
cocoEval.params.useCats = 0
cocoEval.params.maxDets = list(max_dets)
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
coco_det:
一共10个图像:
708条记录-bbox+seg:
遍历
for res_type in result_types:
读取
coco_dets = coco.loadRes(result_file)
进入
from pycocotools.cocoeval import COCOeval
cocoEval = COCOeval(coco, coco_dets, iou_type)
接着进入:
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
这里没看懂,先运行一次结果,好低。。。,发现,原来他必须是按照COCO(json)载入在判断,果断用自己的数据集,放弃这种选取10张图片的方式
DONE (t=46.13s).
Accumulating evaluation results...
DONE (t=6.32s).
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.002
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.003
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.002
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.001
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.001
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.001
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *segm*
DONE (t=49.32s).
Accumulating evaluation results...
DONE (t=6.34s).
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.002
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.003
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.002
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.001
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.001
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.001
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
先接着走流程吧,哎心累
首先是cocoEval.evaluate()
def evaluate(self):
'''
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
:return: None
'''
tic = time.time()
print('Running per image evaluation...')
p = self.params
# add backward compatibility if useSegm is specified in params
if not p.useSegm is None:
p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
print('Evaluate annotation type *{}*'.format(p.iouType))
p.imgIds = list(np.unique(p.imgIds))
if p.useCats:
p.catIds = list(np.unique(p.catIds))
p.maxDets = sorted(p.maxDets)
self.params=p
self._prepare()
# loop through images, area range, max detection number
catIds = p.catIds if p.useCats else [-1]
if p.iouType == 'segm' or p.iouType == 'bbox':
computeIoU = self.computeIoU
elif p.iouType == 'keypoints':
computeIoU = self.computeOks
self.ious = {(imgId, catId): computeIoU(imgId, catId) \
for imgId in p.imgIds
for catId in catIds}
evaluateImg = self.evaluateImg
maxDet = p.maxDets[-1]
self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
for catId in catIds
for areaRng in p.areaRng
for imgId in p.imgIds
]
self._paramsEval = copy.deepcopy(self.params)
toc = time.time()
print('DONE (t={:0.2f}s).'.format(toc-tic))
拿到gt和pred
self._prepare()
if p.useCats:
gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
else:
gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
参考:
(一) COCO Python API - 使用篇
https://blog.csdn.net/gzj2013/article/details/82385164
(二) COCO Python API - 源码分析篇https://blog.csdn.net/gzj2013/article/details/82421166
(三) COCO Python API - 数据集类数量分布
https://blog.csdn.net/gzj2013/article/details/82425954
COCO目标检测测评指标
https://www.jianshu.com/p/d7a06a720a2b
计算IOU:
self.ious = {(imgId, catId): computeIoU(imgId, catId) \
for imgId in p.imgIds
for catId in catIds}
这里看不到IOU计算的代码,是c语言写的:
https://files.pythonhosted.org/packages/96/84/9a07b1095fd8555ba3f3d519517c8743c2554a245f9476e5e39869f948d2/pycocotools-2.0.0.tar.gz
_mask.pyx
# iou computation. support function overload (RLEs-RLEs and bbox-bbox).
def iou( dt, gt, pyiscrowd ):
def _preproc(objs):
if len(objs) == 0:
return objs
if type(objs) == np.ndarray:
if len(objs.shape) == 1:
objs = objs.reshape((objs[0], 1))
# check if it's Nx4 bbox
if not len(objs.shape) == 2 or not objs.shape[1] == 4:
raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension')
objs = objs.astype(np.double)
elif type(objs) == list:
# check if list is in box format and convert it to np.ndarray
isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs]))
isrle = np.all(np.array([type(obj) == dict for obj in objs]))
if isbox:
objs = np.array(objs, dtype=np.double)
if len(objs.shape) == 1:
objs = objs.reshape((1,objs.shape[0]))
elif isrle:
objs = _frString(objs)
else:
raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])')
else:
raise Exception('unrecognized type. The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.')
return objs
def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou):
rleIou( dt._R, gt._R, m, n, iscrowd.data, _iou.data )
def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou):
bbIou( dt.data, gt.data, m, n, iscrowd.data, _iou.data )
def _len(obj):
cdef siz N = 0
if type(obj) == RLEs:
N = obj.n
elif len(obj)==0:
pass
elif type(obj) == np.ndarray:
N = obj.shape[0]
return N
# convert iscrowd to numpy array
cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8)
# simple type checking
cdef siz m, n
dt = _preproc(dt)
gt = _preproc(gt)
m = _len(dt)
n = _len(gt)
if m == 0 or n == 0:
return []
if not type(dt) == type(gt):
raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray')
# define local variables
cdef double* _iou = 0
cdef np.npy_intp shape[1]
# check type and assign iou function
if type(dt) == RLEs:
_iouFun = _rleIou
elif type(dt) == np.ndarray:
_iouFun = _bbIou
else:
raise Exception('input data type not allowed.')
_iou = malloc(m*n* sizeof(double))
iou = np.zeros((m*n, ), dtype=np.double)
shape[0] = m*n
iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou)
PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA)
_iouFun(dt, gt, iscrowd, m, n, iou)
return iou.reshape((m,n), order='F')
maskApi.c
void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
siz g, d; BB db, gb; int crowd;
db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
for( g=0; g0) {
crowd=iscrowd!=NULL && iscrowd[g];
if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb;
ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
while( ct>0 ) {
c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
ca-=c; if(!ca && a
通过代码我们简单的看出,seg的IOU计算,是对整个图像,也就是n*m的矩阵进行对比,当然这里细节还不理解,比如,一个图里有很多类,每一个类有很多框,对应GT也是如此
那么怎么匹配的计算IOU