CenterNet(Objects as points)开源代码:https://github.com/xingyizhou/CenterNet
源码的dataset结构如下:
datasets
|
|---dataset # 解析各数据集(CenterNet共用了下面的数据集)
|---coco.py # Coco数据集
|---coco_hp.py # Coco human pose
|---kitti.py # kitti
|---pascal.py # PascalVOC
|
|---sample # 针对不同的网络 提取所需数据
|---ctdet.py # CenterNet
|---ddd.py # 3D Detection
|---exdet.py # ExtremeNet
|---multi_pose.py #
|
|---data_factory.py # 整合dataset和sample,构建完整的pipeline
该结构这样设计的目的是拆分和精细化每一个步骤,看过论文的知道,CenterNet可以很好地在目标检测、3D检测、人体姿态等任务上迁移,所以作者这样设计datasets更方便我们随意结合,同时,如果我们想使用自己的数据集也会很方便。
下文是详细解释,只想直接用懒得细看请移步:https://blog.csdn.net/weixin_43509263/article/details/100799415
我的任务是目标检测,采用Ccco数据集,使用CenterNet,所以简化文件结构,保留如下:
datasets
|---dataset
|---coco.py
|---sample
|---ctdet.py # CenterNet
|---data_factory.py
'''
实际上,一般构建Dataset我们都会继承torch.utils.data.Dataset,
一般都会重写__init__ 、__getitem__ 和 __len__ 三个函数,
这里,__init__、__len__在dataset实现,而 __getitem__在sample中
'''
"""
对coco数据集进行解析
def __init__(self, opt, split): 解析数据集中各属性
def __len__(self): 返回样本数
def run_eval(self, results, save_dir): eval接口
\-- def save_results(self, results, save_dir): 保存结果
\-- def convert_eval_format(self, all_bboxes): 将自己的结果 转换成coco要求的验证格式
"""
import pycocotools.coco as coco
import pycocotools.cocoeval as COCOeval
import numpy as np
import json
import os
import torch.utils.data as data
class COCO(data.Dataset):
num_classes = 80
default_resolution = [512, 512]
mean = np.array([0.40789654, 0.44719302, 0.47026115],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.28863828, 0.27408164, 0.27809835],
dtype=np.float32).reshape(1, 1, 3)
def __init__(self, opt, split):
'''
:param opt: opt是传入的参数对象,在opt.py中
:param split: train\val\test
'''
super(COCO, self).__init__()
## self.data_dir、img_dir、annot_dir
self.data_dir = os.path.join(opt.data_dir, 'coco')
self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
if split == 'test':
self.annot_path = os.path.join(
self.data_dir, 'annotations', 'image_info_test-dec2017.json')
else:
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'instances_{}2017.json').format(split)
''' ???????????????????/ '''
self.max_objs = 128
# 类别名 加上__background__共81个
self.class_name = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush']
# 类别id(共80个,中间有缺失)
self._valid_ids = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90]
self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)} # 给类别id编号1-80
''' ???????????????????/ '''
self.voc_color = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) \
for v in range(1, self.num_classes + 1)]
''' ???????????????????/ '''
self._data_rng = np.random.RandomState(123)
''' ???????????????????/ '''
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
''' ???????????????????/ '''
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.split = split
self.opt = opt
print('==> initializing coco 2017 {} data.'.format(split))
self.coco = coco.COCO(self.annot_path)
self.images = self.coco.getImgIds() # list: image_id
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def _to_float(self, x):
''' 两位浮点数 '''
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
''' 转换成coco要求的验证格式 '''
# import pdb; pdb.set_trace()
detections = []
for image_id in all_bboxes:
for cls_ind in all_bboxes[image_id]:
category_id = self._valid_ids[cls_ind - 1]
for bbox in all_bboxes[image_id][cls_ind]:
bbox[2] -= bbox[0]
bbox[3] -= bbox[1]
score = bbox[4]
bbox_out = list(map(self._to_float, bbox[0:4]))
detection = {
"image_id": int(image_id),
"category_id": int(category_id),
"bbox": bbox_out,
"score": float("{:.2f}".format(score))
}
if len(bbox) > 5:
extreme_points = list(map(self._to_float, bbox[5:13]))
detection["extreme_points"] = extreme_points
detections.append(detection)
return detections
def __len__(self):
return self.num_samples
def save_results(self, results, save_dir):
''' 保存结果json为要求的格式 '''
json.dump(self.convert_eval_format(results),
open('{}/results.json'.format(save_dir), 'w'))
def run_eval(self, results, save_dir):
''' eval接口 '''
# result_json = os.path.join(save_dir, "results.json")
# detections = self.convert_eval_format(results)
# json.dump(detections, open(result_json, "w"))
self.save_results(results, save_dir)
coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
coco_eval = COCOeval(self.coco, coco_dets, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
import torch.utils.data as data
import numpy as np
import torch
import json
import cv2
import os
from utils.image import flip, color_aug
from utils.image import get_affine_transform, affine_transform
from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
from utils.image import draw_dense_reg
import math
class CTDetDataset(data.Dataset):
def _coco_box_to_bbox(self, box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.float32)
return bbox
def _get_border(self, border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i
def __getitem__(self, index):
img_id = self.images[index]
file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
img_path = os.path.join(self.img_dir, file_name)
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
anns = self.coco.loadAnns(ids=ann_ids)
num_objs = min(len(anns), self.max_objs)
img = cv2.imread(img_path)
height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
if self.opt.keep_res:
input_h = (height | self.opt.pad) + 1
input_w = (width | self.opt.pad) + 1
s = np.array([input_w, input_h], dtype=np.float32)
else:
s = max(img.shape[0], img.shape[1]) * 1.0
input_h, input_w = self.opt.input_h, self.opt.input_w
flipped = False
if self.split == 'train':
if not self.opt.not_rand_crop:
s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
w_border = self._get_border(128, img.shape[1])
h_border = self._get_border(128, img.shape[0])
c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
else:
sf = self.opt.scale
cf = self.opt.shift
c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
if np.random.random() < self.opt.flip:
flipped = True
img = img[:, ::-1, :]
c[0] = width - c[0] - 1
trans_input = get_affine_transform(
c, s, 0, [input_w, input_h])
inp = cv2.warpAffine(img, trans_input,
(input_w, input_h),
flags=cv2.INTER_LINEAR)
inp = (inp.astype(np.float32) / 255.)
if self.split == 'train' and not self.opt.no_color_aug:
color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
inp = (inp - self.mean) / self.std
inp = inp.transpose(2, 0, 1)
output_h = input_h // self.opt.down_ratio
output_w = input_w // self.opt.down_ratio
num_classes = self.num_classes
trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
wh = np.zeros((self.max_objs, 2), dtype=np.float32)
dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
reg = np.zeros((self.max_objs, 2), dtype=np.float32)
ind = np.zeros((self.max_objs), dtype=np.int64)
reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)
draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
draw_umich_gaussian
gt_det = []
for k in range(num_objs):
ann = anns[k]
bbox = self._coco_box_to_bbox(ann['bbox'])
cls_id = int(self.cat_ids[ann['category_id']])
if flipped:
bbox[[0, 2]] = width - bbox[[2, 0]] - 1
bbox[:2] = affine_transform(bbox[:2], trans_output)
bbox[2:] = affine_transform(bbox[2:], trans_output)
bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
if h > 0 and w > 0:
radius = gaussian_radius((math.ceil(h), math.ceil(w)))
radius = max(0, int(radius))
radius = self.opt.hm_gauss if self.opt.mse_loss else radius
ct = np.array(
[(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
ct_int = ct.astype(np.int32)
draw_gaussian(hm[cls_id], ct_int, radius)
wh[k] = 1. * w, 1. * h
ind[k] = ct_int[1] * output_w + ct_int[0]
reg[k] = ct - ct_int
reg_mask[k] = 1
cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
if self.opt.dense_wh:
draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
if self.opt.dense_wh:
hm_a = hm.max(axis=0, keepdims=True)
dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
del ret['wh']
elif self.opt.cat_spec_wh:
ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
del ret['wh']
if self.opt.reg_offset:
ret.update({'reg': reg})
if self.opt.debug > 0 or not self.split == 'train':
gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
np.zeros((1, 6), dtype=np.float32)
meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
ret['meta'] = meta
return ret
# from .sample.ddd import DddDataset
# from .sample.exdet import EXDetDataset
from .sample.ctdet import CTDetDataset
# from .sample.multi_pose import MultiPoseDataset
from .dataset.coco import COCO
# 参数名:类名, 通过传入参数,调用相应类
data_factory = {
'coco': COCO,
# 'pascal': PascalVOC,
# 'kitti': KITTI,
# 'coco_hp': COCOHP
}
_sample_factory = {
# 'exdet': EXDetDataset, # ExtremeNet Detector.
'ctdet': CTDetDataset, # CenterNet Detector
# 'ddd': DddDataset, # 3D Detection
# 'multi_pose': MultiPoseDataset # human pose
}
def get_dataset(dataset, task):
'''
Dataset继承了data_factory和sample_factory两类,可以调用父类的方法,合成了一个完整的dataset
实际上,一般构建Dataset我们都会继承torch.utils.data.Dataset,
一般都会重写__init__ 、__getitem__ 和 __len__ 三个函数,
这里,__init__、__len__在dataset实现,而 __getitem__在sample中
:param dataset: 数据集名称
:param task: 使用网络/目标任务
:return:
'''
class Dataset(data_factory[dataset],_sample_factory[task]):
pass
return Dataset
''' 调用方法 '''
Dataset = get_dataset('coco','ctdet')
val_loader = torch.utils.data.DataLoader(
Dataset(opt, 'val'),
batch_size=1,
shuffle=False,
num_workers=1,
pin_memory=True
)