PV-RCNN:paper,code
demo.py
和test.py
都是在训练好数据之后用来检验模型的文件,
主要区别在于
demo.py
只测试一张图片,test.py
测试很多张demo.py
输出可视化图像,test.py
只输出数据从demo.py
的main()
看起
def main():
args, cfg = parse_config()
demo_dataset = DemoDataset(
dataset_cfg=cfg.DATA_CONFIG, class_names=cfg.CLASS_NAMES, training=False,
root_path=Path(args.data_path), ext=args.ext, logger=logger
)
cfg
的参数在tools/cfg/kitti_models/pv-rcnn.yaml
args
的参数参见parse_config()
,包含:
1. '--cfg_file' #指定配置
2. '--data_path' #指定点云数据文件或目录
3. '--ckpt' #指定预训练模型
4. '--ext' #指定点云数据文件的扩展名
执行时需要手动输入以上参数,或会采用默认数值。
github
上给出的执行范例:
python demo.py --cfg_file cfgs/kitti_models/pv_rcnn.yaml \
--ckpt pv_rcnn_8369.pth \
--data_path ${
POINT_CLOUD_DATA}
# Here ${POINT_CLOUD_DATA} could be the following format:
# The original KITTI .bin data within data/kitti, like data/kitti/training/velodyne/000008.bin
回到demo.py
的main()
model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=demo_dataset)
build_network
函数内只有一个build_detector
函数,build_detector
的定义:
__all__ = {
'Detector3DTemplate': Detector3DTemplate,
'SECONDNet': SECONDNet,
'PartA2Net': PartA2Net,
'PVRCNN': PVRCNN,
'PointPillar': PointPillar
}
def build_detector(model_cfg, num_class, dataset):
model = __all__[model_cfg.NAME](
model_cfg=model_cfg, num_class=num_class, dataset=dataset
)
return model
调用的这些包就是pcdet/models/detectors
下的各个py
文件,里面的函数用到再说
继续回到main()
model.load_params_from_file(filename=args.ckpt, logger=logger, to_cpu=True)
model.cuda()
model.eval()
load_params_from_file( )
顾名思义就是从文件加载参数
cuda( )
和 eval( )
都是数据处理
with torch.no_grad():
for idx, data_dict in enumerate(demo_dataset):
data_dict = demo_dataset.collate_batch([data_dict])
torch.no_grad( )
的目的是使得其中的数据不需要计算梯度,也不会进行反向传播
查看demo_dataset
类 中 collate_batch
的定义:
def collate_batch(batch_list, _unused=False):
data_dict = defaultdict(list)
将data_dict
传入后命名为batch_list
创建一个字典data_dict
defaultdict
表示在当字典里的key
不存在但被查找时,返回的不是keyError
而是一个默认值
for cur_sample in batch_list:
for key, val in cur_sample.items():
data_dict[key].append(val)
batch_size = len(batch_list)
ret = {
}
cur_sample
表示现在的样本值(current sample
),也是DemoDataset
类
用data_dict
记录cur_sample
的内容,主要创建记录数据的list
,如下
for key, val in data_dict.items():
if key in ['voxels', 'voxel_num_points']: #对应体素
ret[key] = np.concatenate(val, axis=0) #多个数组的拼接
elif key in ['points', 'voxel_coords']: #对应关键点
coors = []
for i, coor in enumerate(val):
coor_pad = np.pad(coor, ((0, 0), (1, 0)), mode='constant', constant_values=i)
"""
((0,0),(1,0))
在二维数组array第一维(此处便是行)前面填充0行,最后面填充0行;
在二维数组array第二维(此处便是列)前面填充1列,最后面填充0列
mode='constant'表示指定填充的参数
constant_values=i 表示第一维填充i
"""
coors.append(coor_pad) #将coor_pad补充在coors后面
ret[key] = np.concatenate(coors, axis=0)
elif key in ['gt_boxes']: #对应真值
max_gt = max([len(x) for x in val]) #找寻最大价值的点
batch_gt_boxes3d = np.zeros((batch_size, max_gt, val[0].shape[-1]), dtype=np.float32) #画可视图用的
for k in range(batch_size):
batch_gt_boxes3d[k, :val[k].__len__(), :] = val[k]
ret[key] = batch_gt_boxes3d
else:
ret[key] = np.stack(val, axis=0) #类似concatenate,给指定axis增加维度
ret['batch_size'] = batch_size
return ret
main()
函数中的下一行
load_data_to_gpu(data_dict)
定位到该函数
def load_data_to_gpu(batch_dict):
for key, val in batch_dict.items():
if not isinstance(val, np.ndarray):
#判断一个对象是否是一个已知的类型,ndarray对象是用于存放同类型元素的多维数组
continue
if key in ['frame_id', 'metadata', 'calib', 'image_shape']:
continue
batch_dict[key] = torch.from_numpy(val).float().cuda()
顾名思义就是传递数据给gpu
的
回到Main
pred_dicts, _ = model.forward(data_dict)
定位到forward
,因为是PVRCNN
类下的函数,先看__init__
class PVRCNN(Detector3DTemplate):
def __init__(self, model_cfg, num_class, dataset):
super().__init__(model_cfg=model_cfg, num_class=num_class, dataset=dataset)
self.module_list = self.build_networks()
这里的build_networks
是继承的Detector3DTemplate
中的函数,定位到该函数,
def build_networks(self):
model_info_dict = {
#参数
'module_list': [], #使用的模型
'num_rawpoint_features': self.dataset.point_feature_encoder.num_point_features, #点云特征呢过
'grid_size': self.dataset.grid_size, #网格大小
'point_cloud_range': self.dataset.point_cloud_range, #点云范围
'voxel_size': self.dataset.voxel_size #体素大小
}
for module_name in self.module_topology: #在Detector3DTemplate中继承的,module_topology储存了网络中的每一个module
module, model_info_dict = getattr(self, 'build_%s' % module_name)(
model_info_dict=model_info_dict
) # getattr() 函数用于返回一个对象属性值,将build_module_name返回给了module
self.add_module(module_name, module) #添加一个模块
return model_info_dict['module_list']
让我们定位到module_topology
这个list
,是在Detector3DTemplate
中的__init__
self.module_topology = [ #储存了网络中的每一个模块,vfe表示voxel_feature,pfe表示point_feature
'vfe', 'backbone_3d', 'map_to_bev_module', 'pfe',
'backbone_2d', 'dense_head', 'point_head', 'roi_head'
]
每个build_module_name
都是一个函数,都在这个py
文件下detector3d_template.py
中,基本都是参数的设置
回到foward
pred_dicts, _ = model.forward(data_dict)
定位到forward
,在神经网络中向前传递数据data_dict
,得到预测数据pred_dicts
def forward(self, batch_dict):
for cur_module in self.module_list: #在__init__中
batch_dict = cur_module(batch_dict)
if self.training: #如果用于训练
loss, tb_dict, disp_dict = self.get_training_loss() #关键函数
ret_dict = {
'loss': loss
}
return ret_dict, tb_dict, disp_dict
else: #其他情况,不用于训练,即用于测试
pred_dicts, recall_dicts = self.post_processing(batch_dict) #关键函数
return pred_dicts, recall_dicts
两个关键函数分别是get_training_loss
和post_processing
def get_training_loss(self): #training_loss
disp_dict = {
}
loss_rpn, tb_dict = self.dense_head.get_loss() #models/dense_heads/anchor_head_template.py 206行
loss_point, tb_dict = self.point_head.get_loss(tb_dict)
loss_rcnn, tb_dict = self.roi_head.get_loss(tb_dict)
loss = loss_rpn + loss_point + loss_rcnn #根据论文,最终的loss是由这三个loss组成
return loss, tb_dict, disp_dict
让我们找到get_loss()
,首先要看这里用了哪种dense_head
,从配置文件tools/cfgs/kitti_models/pv_rcnn.yaml
中,找到DENSE_HEAD
(29行),发现对应的是AnchorHeadSingle
,但是在models/dense_heads/AnchorHeadSingle.py
里面没有找到get_loss()
,这是因为它继承了AnchorHeadTemplate
,在AnchorHeadTemplate.py
里面就可以找到get_loss()
(206行)了
def get_loss(self):
cls_loss, tb_dict = self.get_cls_layer_loss() #计算classification layer造成的loss
box_loss, tb_dict_box = self.get_box_reg_layer_loss() #计算regression layer 造成的loaa
tb_dict.update(tb_dict_box)
rpn_loss = cls_loss + box_loss #region proposal loss 是以上两个loss的和
tb_dict['rpn_loss'] = rpn_loss.item()
return rpn_loss, tb_dict
get_cls_layer_loss()
和get_box_reg_layer_loss()
也都在AnchorHeadTemplate.py
里面
调用的地方在forward()
函数中:
pred_dicts, recall_dicts = self.post_processing(batch_dict)
定位到post_processing()
def post_processing(self, batch_dict):
"""
Args:
batch_dict:
batch_size: 批数据大小
batch_cls_preds: (B, num_boxes, num_classes | 1) or (N1+N2+..., num_classes | 1) 类别预测
batch_box_preds: (B, num_boxes, 7+C) or (N1+N2+..., 7+C) 边框位置预测
cls_preds_normalized: indicate whether batch_cls_preds is normalized 是否归一化
batch_index: optional (N1+N2+...)
roi_labels: (B, num_rois) 1 .. num_classes :
Returns:
"""
post_process_cfg = self.model_cfg.POST_PROCESSING #配置参数
batch_size = batch_dict['batch_size'] #批数据大小
recall_dict = {
}
pred_dicts = []
for index in range(batch_size): #在一批数据中
if batch_dict.get('batch_index', None) is not None:
assert batch_dict['batch_cls_preds'].shape.__len__() == 2
batch_mask = (batch_dict['batch_index'] == index)
else:
assert batch_dict['batch_cls_preds'].shape.__len__() == 3
batch_mask = index
box_preds = batch_dict['batch_box_preds'][batch_mask]
cls_preds = batch_dict['batch_cls_preds'][batch_mask]
src_cls_preds = cls_preds
src_box_preds = box_preds
assert cls_preds.shape[1] in [1, self.num_class]
if not batch_dict['cls_preds_normalized']: #如果没有归一化,选择sigmoid进行归一化
cls_preds = torch.sigmoid(cls_preds)
if post_process_cfg.NMS_CONFIG.MULTI_CLASSES_NMS:
raise NotImplementedError
else:
cls_preds, label_preds = torch.max(cls_preds, dim=-1)
'''
函数会返回两个tensor,
第一个tensor是每行的最大值,softmax的输出中最大的是1,所以第一个tensor是全1的tensor;
第二个tensor是每行最大值的索引。
'''
label_preds = batch_dict['roi_labels'][index] if batch_dict.get('has_class_labels', False) else label_preds + 1
'''
if 'has_class_labels' == False: # 如果没有class标签的话,预测标签为roi_labels的索引
label_preds = batch_dict['roi_labels'][index]
else: # 如果没有class标签的话,预测索引+1(下一个循环)
label_preds = label_preds + 1
'''
selected, selected_scores = class_agnostic_nms( # 非极大值抑制
box_scores=cls_preds, box_preds=box_preds, # 分类的预测价值,边框的预测价值
nms_config=post_process_cfg.NMS_CONFIG, #配置参数
score_thresh=post_process_cfg.SCORE_THRESH #阈值
)
if post_process_cfg.OUTPUT_RAW_SCORE:
max_cls_preds, _ = torch.max(src_cls_preds, dim=-1) # 预测值的最大值
selected_scores = max_cls_preds[selected]
final_scores = selected_scores #最终分数
final_labels = label_preds[selected] #最终标签
final_boxes = box_preds[selected] #最终预测框
recall_dict = self.generate_recall_record(
box_preds=final_boxes if 'rois' not in batch_dict else src_box_preds,
recall_dict=recall_dict, batch_index=index, data_dict=batch_dict,
thresh_list=post_process_cfg.RECALL_THRESH_LIST
)
'''
if 'rois' not in batch_dict: #如果rois不在数据中
box_preds=final_boxes #就选取之前得到的最终预测框
else: #如果rois本来就在数据中
box_preds=src_box_preds, #就选取数据中的结果batch_dict['batch_box_preds']
'''
record_dict = {
#Recall 是用来计算被正确识别出来的个数与测试集中所有个数的比值
'pred_boxes': final_boxes,
'pred_scores': final_scores,
'pred_labels': final_labels
}
pred_dicts.append(record_dict)
return pred_dicts, recall_dict
其中用到了两个关键函数分别是class_agnostic_nms
和generate_recall_record
def class_agnostic_nms(box_scores, box_preds, nms_config, score_thresh=None): #非极大值抑制
src_box_scores = box_scores
if score_thresh is not None:
scores_mask = (box_scores >= score_thresh)
box_scores = box_scores[scores_mask]
box_preds = box_preds[scores_mask]
selected = []
if box_scores.shape[0] > 0:
box_scores_nms, indices = torch.topk(box_scores, k=min(nms_config.NMS_PRE_MAXSIZE, box_scores.shape[0]))
boxes_for_nms = box_preds[indices]
keep_idx, selected_scores = getattr(iou3d_nms_utils, nms_config.NMS_TYPE)(
boxes_for_nms, box_scores_nms, nms_config.NMS_THRESH, **nms_config
)
selected = indices[keep_idx[:nms_config.NMS_POST_MAXSIZE]]
if score_thresh is not None:
original_idxs = scores_mask.nonzero().view(-1)
selected = original_idxs[selected]
return selected, src_box_scores[selected]
def generate_recall_record(box_preds, recall_dict, batch_index, data_dict=None, thresh_list=None):
if 'gt_boxes' not in data_dict:
return recall_dict
rois = data_dict['rois'][batch_index] if 'rois' in data_dict else None #region of interest感兴趣区
'''
if 'rois' in data_dict: #如果rois在数据中
rois = data_dict['rois'][batch_index] #就索引到数据中的rois
else: #否则=None
rois = None
'''
gt_boxes = data_dict['gt_boxes'][batch_index] #gt_boxes是data中gt的索引
if recall_dict.__len__() == 0: #第一次调用时recall={},进行初始化
recall_dict = {
'gt': 0}
for cur_thresh in thresh_list: #全部初始化
recall_dict['roi_%s' % (str(cur_thresh))] = 0
recall_dict['rcnn_%s' % (str(cur_thresh))] = 0
cur_gt = gt_boxes #令current_gt等于gt
k = cur_gt.__len__() - 1 #从最后一位开始往前
while k > 0 and cur_gt[k].sum() == 0: #找到cur_gt[k].sum() != 0的这一位
k -= 1
cur_gt = cur_gt[:k + 1] #取出cur_gt[k].sum() != 0的部分
if cur_gt.sum() > 0: #cur_gt存在
if box_preds.shape[0] > 0: #预测框大小>0(也就是存在)
iou3d_rcnn = iou3d_nms_utils.boxes_iou3d_gpu(box_preds, cur_gt[:, 0:7]) #计算3d交并比
else: #<0就让他等于0
iou3d_rcnn = torch.zeros((0, cur_gt.shape[0]))
if rois is not None:
iou3d_roi = iou3d_nms_utils.boxes_iou3d_gpu(rois, cur_gt[:, 0:7])
for cur_thresh in thresh_list:
if iou3d_rcnn.shape[0] == 0:
recall_dict['rcnn_%s' % str(cur_thresh)] += 0
else:
rcnn_recalled = (iou3d_rcnn.max(dim=0)[0] > cur_thresh).sum().item()
recall_dict['rcnn_%s' % str(cur_thresh)] += rcnn_recalled
if rois is not None:
roi_recalled = (iou3d_roi.max(dim=0)[0] > cur_thresh).sum().item()
recall_dict['roi_%s' % str(cur_thresh)] += roi_recalled
recall_dict['gt'] += cur_gt.shape[0]
else:
gt_iou = box_preds.new_zeros(box_preds.shape[0]) #=0
return recall_dict
main()
函数中的最后一部分是draw_scenes()
V.draw_scenes(
points=data_dict['points'][:, 1:], ref_boxes=pred_dicts[0]['pred_boxes'],
ref_scores=pred_dicts[0]['pred_scores'], ref_labels=pred_dicts[0]['pred_labels']
)
mlab.show(stop=True)
定位到draw_scenes()
def draw_scenes(points, gt_boxes=None, ref_boxes=None, ref_scores=None, ref_labels=None):
#main()调用, 参数(数据点,真值框,参考框,参考分数,参考标签)
if not isinstance(points, np.ndarray):
points = points.cpu().numpy()
if ref_boxes is not None and not isinstance(ref_boxes, np.ndarray):
ref_boxes = ref_boxes.cpu().numpy()
if gt_boxes is not None and not isinstance(gt_boxes, np.ndarray):
gt_boxes = gt_boxes.cpu().numpy()
if ref_scores is not None and not isinstance(ref_scores, np.ndarray):
ref_scores = ref_scores.cpu().numpy()
if ref_labels is not None and not isinstance(ref_labels, np.ndarray):
ref_labels = ref_labels.cpu().numpy()
fig = visualize_pts(points) #fig是对输入数据点的可视化图像
fig = draw_multi_grid_range(fig, bv_range=(0, -40, 80, 40)) #绘制多重网格范围
if gt_boxes is not None:
corners3d = box_utils.boxes_to_corners_3d(gt_boxes)
fig = draw_corners3d(corners3d, fig=fig, color=(0, 0, 1), max_num=100)
if ref_boxes is not None:
ref_corners3d = box_utils.boxes_to_corners_3d(ref_boxes)
if ref_labels is None:
fig = draw_corners3d(ref_corners3d, fig=fig, color=(0, 1, 0), cls=ref_scores, max_num=100)
else:
for k in range(ref_labels.min(), ref_labels.max() + 1):
cur_color = tuple(box_colormap[k % len(box_colormap)])
mask = (ref_labels == k)
fig = draw_corners3d(ref_corners3d[mask], fig=fig, color=cur_color, cls=ref_scores[mask], max_num=100)
mlab.view(azimuth=-179, elevation=54.0, distance=104.0, roll=90.0)
return fig