源码链接: PCDet.
根据论文.中给出的网络结构图,网络主要分为3个部分。
网络的主体框架在PCDet-master\pcdet\models\detectors\second_net.py
中,可以清晰的看到网络的主体结构。
class SECONDNet(Detector3D):
# dataset就是全部的给入数据
def __init__(self, num_class, dataset):
super().__init__(num_class, dataset)
self.sparse_shape = dataset.voxel_generator.grid_size[::-1] + [1, 0, 0]
self.build_networks(cfg.MODEL) # 在这里找到Detector3D的初始化
def forward_rpn(self, voxels, num_points, coordinates, batch_size, voxel_centers, **kwargs):
# 原本voxel是包含了每个体素里面max_points_of_each_voxel所有点的特征
# 将voxel转化为了一个体素一个4维特征的形式
voxel_features = self.vfe(
features=voxels,
num_voxels=num_points,
coords=coordinates
)
# 在这里,将voxel_features,coordinates结合起来,转化为了最容易易理解的3D形式的数据
input_sp_tensor = spconv.SparseConvTensor(
features=voxel_features,
indices=coordinates,
spatial_shape=self.sparse_shape,
batch_size=batch_size
)
# backbone下采样
backbone_ret_dict = self.rpn_net(
input_sp_tensor,
**{'voxel_centers': voxel_centers}
)
# 计算损失
rpn_preds_dict = self.rpn_head(
backbone_ret_dict['spatial_features'],
**{'gt_boxes': kwargs.get('gt_boxes', None)}
)
rpn_preds_dict.update(backbone_ret_dict)
rpn_ret_dict = {
'rpn_cls_preds': rpn_preds_dict['cls_preds'],
'rpn_box_preds': rpn_preds_dict['box_preds'],
'rpn_dir_cls_preds': rpn_preds_dict.get('dir_cls_preds', None),
'anchors': rpn_preds_dict['anchors']
}
return rpn_ret_dict
def forward(self, input_dict):
rpn_ret_dict = self.forward_rpn(**input_dict)
if self.training:
loss, tb_dict, disp_dict = self.get_training_loss()
ret_dict = {
'loss': loss
}
return ret_dict, tb_dict, disp_dict
else:
pred_dicts, recall_dicts = self.predict_boxes(rpn_ret_dict, rcnn_ret_dict=None, input_dict=input_dict)
return pred_dicts, recall_dicts
def get_training_loss(self):
disp_dict = {}
loss_anchor_box, tb_dict = self.rpn_head.get_loss()
loss_rpn = loss_anchor_box
tb_dict = {
'loss_rpn': loss_rpn.item(),
**tb_dict
}
loss = loss_rpn
# disp_dict是空的
return loss, tb_dict, disp_dict
MeanVoxelFeatureExtractor
初步得到每个体素的特征,对一个体素块中的每个点的坐标和强度求均值
class MeanVoxelFeatureExtractor(VoxelFeatureExtractor):
def __init__(self, **kwargs):
super().__init__()
def get_output_feature_dim(self):
return cfg.DATA_CONFIG.NUM_POINT_FEATURES['use'] # 4
# 这里采用的是均值求特征法,即将每个体素中的所有点的坐标和强度求均值
# 对每一个voxel中所有点的数据求平均值,得到points_mean[N, 4]
def forward(self, features, num_voxels, **kwargs):
"""
:param features: (N, max_points_of_each_voxel, 3 + C) [batch_size * num_voxels, max_points_of_each_voxel, 4]
:param num_voxels: (N)(batch_size * num_voxels)
:param kwargs:
:return:
"""
points_mean = features[:, :, :].sum(dim=1, keepdim=False) / num_voxels.type_as(features).view(-1, 1)
return points_mean.contiguous()
BackBone8x
与论文中结构有一点点的不同,大体上相同
class BackBone8x(nn.Module):
def __init__(self, input_channels):
super().__init__()
# 给nn.BatchNorm1d几个默认参数
norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01)
# 一个subm
self.conv_input = spconv.SparseSequential(
spconv.SubMConv3d(input_channels, 16, 3, padding=1, bias=False, indice_key='subm1'),
norm_fn(16),
nn.ReLU(),
)
block = self.post_act_block
# 又一个subm
# [1600, 1408, 41]
self.conv1 = spconv.SparseSequential(
block(16, 16, 3, norm_fn=norm_fn, padding=1, indice_key='subm1'),
)
# spconv_stride2 + subm + subm
self.conv2 = spconv.SparseSequential(
# [1600, 1408, 41] <- [800, 704, 21]
block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='spconv'),
block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'),
block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'),
)
# spconv_stride2 + subm + subm
self.conv3 = spconv.SparseSequential(
# [800, 704, 21] <- [400, 352, 11]
block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='spconv'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'),
)
# spconv_stride2 + subm + subm
self.conv4 = spconv.SparseSequential(
# [400, 352, 11] <- [200, 176, 5]
block(64, 64, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='spconv'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'),
)
last_pad = 0 if cfg.DATA_CONFIG.VOXEL_GENERATOR.VOXEL_SIZE[-1] in [0.1, 0.2] else (1, 0, 0)
# spconv_stride(2, 1, 1)
self.conv_out = spconv.SparseSequential(
# [200, 150, 5] -> [200, 150, 2]
spconv.SparseConv3d(64, 128, (3, 1, 1), stride=(2, 1, 1), padding=last_pad,
bias=False, indice_key='spconv_down2'),
norm_fn(128),
nn.ReLU(),
)
def forward(self, input_sp_tensor, **kwargs):
"""
:param voxel_features: (N, C) N个体素,每个有C维特征
:param coors: (N, 4) [batch_idx, z_idx, y_idx, x_idx], N个体素分别的体素坐标位置 sparse_shape: (z_size, y_size, x_size)
:param batch_size:
:return:
"""
x = self.conv_input(input_sp_tensor) # [16, 4, [41, 1408, 1600]] --> [16, 4, [41, 1408, 1600]]
x_conv1 = self.conv1(x) # [16, 16, [41, 1408, 1600]] --> [16, 4, [41, 1408, 1600]]
x_conv2 = self.conv2(x_conv1) # [16, 16, [41, 1408, 1600]] --> [16, 32, [21, 704, 800]]
x_conv3 = self.conv3(x_conv2) # [16, 32, [21, 704, 800]]--> [16, 64, [11, 352, 400]]
x_conv4 = self.conv4(x_conv3) # [16, 64, [11, 352, 400]] --> [16, 64, [5, 176, 200]]
# spconv卷积的结果不像conv2d那么简单,batch_size,channal,shape这三个内容都是分开存放的,在后面用out.dense才把这三个内容组合到一 起了,变为密集型的张量
# spconv卷积的输入也是一样,输入和输出更像是一个字典或者说元组
# 注意卷积中pad与no_pad的区别
# for detection head
# [200, 176, 5] -> [200, 176, 2]
out = self.conv_out(x_conv4) # [16, 64, [5, 176, 200]] --> [16, 128, [2, 176, 200]]
spatial_features = out.dense() # 转化为密集tensor,[16, 128, 2, 176, 200]
N, C, D, H, W = spatial_features.shape
spatial_features = spatial_features.view(N, C * D, H, W) # reshape为2D鸟瞰特征
ret = {'spatial_features': spatial_features}
return ret
RPN_head
这一部分与论文中介绍的也有微小的差异,论文中RPN_head是三部分的拼接,但是在源码中,缺少了一部分,是两个部分的拼接。
class AnchorHead(nn.Module):
def __init__(self, grid_size, anchor_target_cfg):
super().__init__()
anchor_cfg = anchor_target_cfg.ANCHOR_GENERATOR
anchor_generators = []
self.num_class = len(cfg.CLASS_NAMES)
# 对每一种class都进行专有的anchor_generator,放到一个list里面
for cur_name in cfg.CLASS_NAMES:
cur_cfg = None
# 根据class_name拿出对应的cfg
for a_cfg in anchor_cfg:
if a_cfg['class_name'] == cur_name:
cur_cfg = a_cfg
break
assert cur_cfg is not None, 'Not found anchor config: %s' % cur_name
# 根据不同class_name的cfg的参数,初始化不同的anchor_generator
anchor_generator = AnchorGeneratorRange(
anchor_ranges=cur_cfg['anchor_range'],
sizes=cur_cfg['sizes'],
rotations=cur_cfg['rotations'],
class_name=cur_cfg['class_name'],
match_threshold=cur_cfg['matched_threshold'],
unmatch_threshold=cur_cfg['unmatched_threshold']
)
anchor_generators.append(anchor_generator)
# ResidualCoder
# 初始化box_coder_utils,用来将gt_box转化为target
self.box_coder = getattr(box_coder_utils, anchor_target_cfg.BOX_CODER)()
# target_assigner中集成了一系列功能,anchor_generators以及target生成
self.target_assigner = TargetAssigner(
anchor_generators=anchor_generators,
pos_fraction=anchor_target_cfg.SAMPLE_POS_FRACTION,
sample_size=anchor_target_cfg.SAMPLE_SIZE,
region_similarity_fn_name=anchor_target_cfg.REGION_SIMILARITY_FN,
box_coder=self.box_coder
)
# 每个位置上根据角度不同,size不同可能会有好多个anchor
self.num_anchors_per_location = self.target_assigner.num_anchors_per_location
self.box_code_size = self.box_coder.code_size # 7
# grid_size是初始体素的维度,DOWNSAMPLED_FACTOR是下采样的倍数,feature_map_size网络输出时的维度,也是anchor生成的维度
feature_map_size = grid_size[:2] // anchor_target_cfg.DOWNSAMPLED_FACTOR # [176,200]
feature_map_size = [*feature_map_size, 1][::-1] # [1,200,176]
# ret:{ 'anchors': anchors,
# 'matched_thresholds': matched_thresholds,
# 'unmatched_thresholds': unmatched_thresholds
# }
ret = self.target_assigner.generate_anchors(feature_map_size)
# anchors_dict种对每一个class的anchor都进行了分类存放,内容和上面的ret基本上一样
anchors_dict = self.target_assigner.generate_anchors_dict(feature_map_size)
#
anchors = ret['anchors'].reshape([-1, 7])
self.anchor_cache = {
'anchors': anchors,
'anchors_dict': anchors_dict,
}
self.forward_ret_dict = None
# 三种loss的初始化
self.build_losses(cfg.MODEL.LOSSES)
def build_losses(self, losses_cfg):
# loss function definition
self.cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(alpha=0.25, gamma=2.0)
code_weights = losses_cfg.LOSS_WEIGHTS['code_weights']
rpn_code_weights = code_weights[3:7] if losses_cfg.RPN_REG_LOSS == 'bin-based' else code_weights
self.reg_loss_func = loss_utils.WeightedSmoothL1LocalizationLoss(sigma=3.0, code_weights=rpn_code_weights)
self.dir_loss_func = loss_utils.WeightedSoftmaxClassificationLoss()
def assign_targets(self, gt_boxes):
"""
:param gt_boxes: (B, N, 8)
:return:
"""
gt_boxes = gt_boxes.cpu().numpy()
batch_size = gt_boxes.shape[0]
# class_label
gt_classes = gt_boxes[:, :, 7]
# bbox_label
gt_boxes = gt_boxes[:, :, :7]
targets_dict_list = []
for k in range(batch_size):
cur_gt = gt_boxes[k]
cnt = cur_gt.__len__() - 1
while cnt > 0 and cur_gt[cnt].sum() == 0:
cnt -= 1
cur_gt = cur_gt[:cnt + 1]
cur_gt_classes = gt_classes[k][:cnt + 1]
cur_gt_names = np.array(cfg.CLASS_NAMES)[cur_gt_classes.astype(np.int32) - 1]
# 每一个class对应的target_dict
cur_target_dict = self.target_assigner.assign_v2(
anchors_dict=self.anchor_cache['anchors_dict'], # 每个class分开的anchor和theresh
gt_boxes=cur_gt,
gt_classes=cur_gt_classes,
gt_names=cur_gt_names
)
targets_dict_list.append(cur_target_dict)
# targets_dict_list是个大的list,其中包含小的dict,下面把list拆开,小dict合并同类项变list,变成一个大的dict,包含小list
# 进一步的意思,舍弃掉不同class对应的target_dict,合并成整个网络的target
targets_dict = {}
for key in targets_dict_list[0].keys():
val = np.stack([x[key] for x in targets_dict_list], axis=0)
targets_dict[key] = val
return targets_dict
@staticmethod
def add_sin_difference(boxes1, boxes2, dim=6):
assert dim != -1
rad_pred_encoding = torch.sin(boxes1[..., dim:dim+1]) * torch.cos(boxes2[..., dim:dim+1])
rad_tg_encoding = torch.cos(boxes1[..., dim:dim+1]) * torch.sin(boxes2[..., dim:dim+1])
boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding, boxes1[..., dim+1:]], dim=-1)
boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding, boxes2[..., dim+1:]], dim=-1)
return boxes1, boxes2
@staticmethod
def get_direction_target(anchors, reg_targets, one_hot=True, dir_offset=0, num_bins=2):
batch_size = reg_targets.shape[0]
anchors = anchors.view(batch_size, -1, anchors.shape[-1])
# 真实角度值
rot_gt = reg_targets[..., 6] + anchors[..., 6]
# 约束到一定范围内
offset_rot = common_utils.limit_period_torch(rot_gt - dir_offset, 0, 2 * np.pi)
# 在这里将朝向转化为0或者1
dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long()
# 约束到0或者1
dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1)
if one_hot:
dir_targets = torch.zeros(*list(dir_cls_targets.shape), num_bins, dtype=anchors.dtype,
device=dir_cls_targets.device)
dir_targets.scatter_(-1, dir_cls_targets.unsqueeze(dim=-1).long(), 1.0)
dir_cls_targets = dir_targets
return dir_cls_targets
def get_loss(self, forward_ret_dict=None):
loss_cfgs = cfg.MODEL.LOSSES
# forward_ret_dict是forward计算的结果
forward_ret_dict = self.forward_ret_dict if forward_ret_dict is None else forward_ret_dict
#
anchors = forward_ret_dict['anchors']
box_preds = forward_ret_dict['box_preds']
cls_preds = forward_ret_dict['cls_preds']
box_dir_cls_preds = forward_ret_dict['dir_cls_preds']
box_cls_labels = forward_ret_dict['box_cls_labels']
box_reg_targets = forward_ret_dict['box_reg_targets']
batch_size = int(box_preds.shape[0])
anchors = anchors.view(1, -1, anchors.shape[-1]).repeat(batch_size, 1, 1)
# rpn head losses
# 损失权重
cared = box_cls_labels >= 0 # [batch_size, num_anchors]
positives = box_cls_labels > 0 # [batch_size, num_anchors]
negatives = box_cls_labels == 0 # [batch_size, num_anchors]
negative_cls_weights = negatives * 1.0 # [batch_size, num_anchors]
cls_weights = (negative_cls_weights + 1.0 * positives).float() # [batch_size, num_anchors]
reg_weights = positives.float() # 只有box_cls_labels > 0对应的anchor才有损失,其他的损失weight都是0
# 归一化
pos_normalizer = positives.sum(1, keepdim=True).float()
reg_weights /= torch.clamp(pos_normalizer, min=1.0)
cls_weights /= torch.clamp(pos_normalizer, min=1.0)
cls_targets = box_cls_labels * cared.type_as(box_cls_labels)
cls_targets = cls_targets.unsqueeze(dim=-1)
num_class = self.num_class
cls_targets = cls_targets.squeeze(dim=-1)
one_hot_targets = torch.zeros(
*list(cls_targets.shape), num_class + 1, dtype=box_preds.dtype, device=cls_targets.device
)
one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0)
if cfg.MODEL.RPN.RPN_HEAD.ARGS['encode_background_as_zeros']:
# 把背景视为0分类
# box_cls_labels中背景也对应着一个分类,label编号是0,one_hot_targets中最后一维的第一位就是背景的,去掉背景
cls_preds = cls_preds.view(batch_size, -1, num_class)
one_hot_targets = one_hot_targets[..., 1:]
else:
cls_preds = cls_preds.view(batch_size, -1, num_class + 1)
loss_weights_dict = loss_cfgs.LOSS_WEIGHTS
cls_loss = self.cls_loss_func(cls_preds, one_hot_targets, weights=cls_weights) # [N, M][batch_size, num_anchors, num_classes]
cls_loss_reduced = cls_loss.sum() / batch_size
# cls损失占总损失的比重
cls_loss_reduced = cls_loss_reduced * loss_weights_dict['rpn_cls_weight']
box_preds = box_preds.view(batch_size, -1, box_preds.shape[-1] // self.num_anchors_per_location)
if loss_cfgs.RPN_REG_LOSS == 'smooth-l1':
# sin(a - b) = sinacosb-cosasinb
box_preds_sin, reg_targets_sin = self.add_sin_difference(box_preds, box_reg_targets)
loc_loss = self.reg_loss_func(box_preds_sin, reg_targets_sin, weights=reg_weights) # [N, M][batch_size, num_anchors]
loc_loss_reduced = loc_loss.sum() / batch_size
else:
raise NotImplementedError
# loc损失在总损失中占的比重
loc_loss_reduced = loc_loss_reduced * loss_weights_dict['rpn_loc_weight']
rpn_loss = loc_loss_reduced + cls_loss_reduced
tb_dict = {
'rpn_loss_loc': loc_loss_reduced.item(),
'rpn_loss_cls': cls_loss_reduced.item()
}
if box_dir_cls_preds is not None:
dir_targets = self.get_direction_target(
anchors, box_reg_targets,
dir_offset=cfg.MODEL.RPN.RPN_HEAD.ARGS['dir_offset'],
num_bins=cfg.MODEL.RPN.RPN_HEAD.ARGS['num_direction_bins']
)
dir_logits = box_dir_cls_preds.view(batch_size, -1, cfg.MODEL.RPN.RPN_HEAD.ARGS['num_direction_bins'])
# 也是label>0时才有损失,也归一化
weights = positives.type_as(dir_logits)
weights /= torch.clamp(weights.sum(-1, keepdim=True), min=1.0)
# 计算损失
dir_loss = self.dir_loss_func(dir_logits, dir_targets, weights=weights)
dir_loss = dir_loss.sum() / batch_size
dir_loss = dir_loss * loss_weights_dict['rpn_dir_weight']
rpn_loss += dir_loss
tb_dict['rpn_loss_dir'] = dir_loss.item()
tb_dict['rpn_loss'] = rpn_loss.item()
# rpn_loss总损失,tb_dict损失表
return rpn_loss, tb_dict
class RPNV2( ):
def __init__(self, num_class, args, anchor_target_cfg, grid_size, **kwargs):
super().__init__(grid_size=grid_size, anchor_target_cfg=anchor_target_cfg) # 在这里初始化了上面的AnchorHead
self._use_direction_classifier = args['use_direction_classifier'] # True
self._concat_input = args['concat_input'] # False
assert len(args['layer_strides']) == len(args['layer_nums'])
assert len(args['num_filters']) == len(args['layer_nums'])
assert len(args['num_upsample_filters']) == len(args['layer_nums'])
# partial:将函数中的一些参数固定,以后调用时不用指明这些参数
if args['use_norm']:
BatchNorm2d = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.01)
Conv2d = partial(nn.Conv2d, bias=False)
ConvTranspose2d = partial(nn.ConvTranspose2d, bias=False)
else:
BatchNorm2d = Empty
Conv2d = partial(nn.Conv2d, bias=True)
ConvTranspose2d = partial(nn.ConvTranspose2d, bias=True)
in_filters = [args['num_input_features'], *args['num_filters'][:-1]] # [256, 256, 128]
blocks = []
deblocks = []
# ARGS: {
# 'use_norm': True,
# 'concat_input': False,
# 'num_input_features': 256,
# 'layer_nums': [5, 5],
# 'layer_strides': [1, 2],
# 'num_filters': [128, 256],
# 'upsample_strides': [1, 2],
# 'num_upsample_filters': [256, 256],
# 'encode_background_as_zeros': True,
#
# 'use_direction_classifier': True,
# 'num_direction_bins': 2,
# 'dir_offset': 0.78539,
# 'dir_limit_offset': 0.0,
# 'use_binary_dir_classifier': False
# }
# [5, 5]
for i, layer_num in enumerate(args['layer_nums']):
# i = 0: in_filters[i] = 256, args['num_filters'][i] = 128, stride=args['layer_strides'][i] = 1
# i = 1: in_filters[i] = 256, args['num_filters'][i] = 128, stride=args['layer_strides'][i] = 2
block = Sequential(
nn.ZeroPad2d(1),
Conv2d(in_filters[i], args['num_filters'][i], 3, stride=args['layer_strides'][i]),
BatchNorm2d(args['num_filters'][i]),
nn.ReLU(),
)
# i = 0: 加入5个Conv2d(128, 128, 3, p=1)
# i = 1: 加入5个Conv2d(256, 256, 3, p=1)
for j in range(layer_num):
block.add(Conv2d(args['num_filters'][i], args['num_filters'][i], 3, padding=1))
block.add(BatchNorm2d(args['num_filters'][i]))
block.add(nn.ReLU())
blocks.append(block)
# i = 0: ConvTranspose2d(128, 256, 1)
# i = 1:ConvTranspose2d(256, 256, 2)
deblock = Sequential(
ConvTranspose2d(
args['num_filters'][i], args['num_upsample_filters'][i], args['upsample_strides'][i],
stride=args['upsample_strides'][i]
),
BatchNorm2d(args['num_upsample_filters'][i]),
nn.ReLU(),
)
deblocks.append(deblock)
# blocks = [Conv2d(256,128,s=1)+Conv2d(128,128)*5, Conv2d(256,256,s=2)+Conv2d*5]
# deblock = [ConvTranspose2d(128, 256, s=1),ConvTranspose2d(256, 256, s=2)]
c_in = sum(args['num_upsample_filters']) # 256+256=512
if self._concat_input: # False
c_in += args['num_input_features']
# 额外的deblock部分,目前的cfg中没有
if len(args['upsample_strides']) > len(args['num_filters']):
deblock = Sequential(
ConvTranspose2d(c_in, c_in, args['upsample_strides'][-1], stride=args['upsample_strides'][-1]),
BatchNorm2d(c_in),
nn.ReLU(),
)
deblocks.append(deblock)
self.blocks = nn.ModuleList(blocks)
self.deblocks = nn.ModuleList(deblocks)
# 每个体素上都有num_anchors_per_location个anchor,num_class种分类
if args['encode_background_as_zeros']:
num_cls = self.num_anchors_per_location * num_class
else:
num_cls = self.num_anchors_per_location * (num_class + 1)
# 全卷积网络到计算cls损失的形式
self.conv_cls = nn.Conv2d(c_in, num_cls, 1)
# 全卷积网络到计算reg损失的形式
reg_channels = self.num_anchors_per_location * self.box_code_size
self.conv_box = nn.Conv2d(c_in, reg_channels, 1)
if args['use_direction_classifier']:
self.conv_dir_cls = nn.Conv2d(c_in, self.num_anchors_per_location * args['num_direction_bins'], 1)
# 这里大概是最后一个初始化操作了,执行torch网络的初始化
self.init_weights()
def init_weights(self):
pi = 0.01
nn.init.constant_(self.conv_cls.bias, -np.log((1 - pi) / pi))
def forward(self, x_in, bev=None, **kwargs):
ups = []
x = x_in
ret_dict = {}
# biocks里面包括两个block
for i in range(len(self.blocks)):
x = self.blocks[i](x)
# 有strict
stride = int(x_in.shape[2] / x.shape[2])
ret_dict['spatial_features_%dx' % stride] = x
# 经过了deconv,放到了ups中
ups.append(self.deblocks[i](x))
# 一个conv的步长为1,deconv倍数也是1,得到一个结果
# 一个conv的步长为2,deconv倍数也是2,得到一个结果
# 两个结果拼接
if self._concat_input:
ups.append(x_in)
# 两个block的结果cat起来
if len(ups) > 1:
x = torch.cat(ups, dim=1)
else:
x = ups[0]
if len(self.deblocks)>len(self.blocks):
x = self.deblocks[-1](x)
ret_dict['spatial_features_last'] = x
# 得到类别识别结果和reg识别结果
box_preds = self.conv_box(x)
cls_preds = self.conv_cls(x)
# [N, C, y(H), x(W)]
box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous()
ret_dict.update({
'box_preds': box_preds,
'cls_preds': cls_preds,
})
# 朝向识别的结果
if self._use_direction_classifier:
dir_cls_preds = self.conv_dir_cls(x)
dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous()
ret_dict['dir_cls_preds'] = dir_cls_preds
ret_dict['anchors'] = torch.from_numpy(self.anchor_cache['anchors']).cuda()
if self.training:
# 生成target,即网络的输出计算损失时要比较的对象!!!!!好难
targets_dict = self.assign_targets(
gt_boxes=kwargs['gt_boxes'],
)
ret_dict.update({
'box_cls_labels': torch.from_numpy(targets_dict['labels']).cuda(),
'box_reg_targets': torch.from_numpy(targets_dict['bbox_targets']).cuda(),
'reg_src_targets': torch.from_numpy(targets_dict['bbox_src_targets']).cuda(),
'reg_weights': torch.from_numpy(targets_dict['bbox_outside_weights']).cuda(),
})
self.forward_ret_dict = ret_dict
return ret_dict
计算损失是神经网络中比较重要的部分。
首先明确计算损失的四个要素:①预测值②标签值③权重④损失函数
RPN_head
的计算,网络有三个输出,box_preds
和cls_preds
,dir_cls_preds
self.target_assigner
程序来进行def build_losses(self, losses_cfg):
# loss function definition
self.cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(alpha=0.25, gamma=2.0)
code_weights = losses_cfg.LOSS_WEIGHTS['code_weights']
rpn_code_weights = code_weights[3:7] if losses_cfg.RPN_REG_LOSS == 'bin-based' else code_weights
self.reg_loss_func = loss_utils.WeightedSmoothL1LocalizationLoss(sigma=3.0, code_weights=rpn_code_weights)
self.dir_loss_func = loss_utils.WeightedSoftmaxClassificationLoss()
从名称上就可以看出来使用的损失函数,损失函数看源码比较麻烦,可以直接百度数学原理。
其实在整个网络中以上只是一小部分,其中调用到的类,调用到的函数几乎包含了文件夹里面的全部,都要慢慢看呐。
参考博客:point pillars.