def build(self):
# Setup input placeholders
# 一堆的输入信息.就是给输入占个坑
self._set_up_input_pls()
# Setup feature extractors
# 输入图像的featuremap
self._set_up_feature_extractors()
# proposal_input
# 1*1卷积后的bev_fasturemap和img_featuremap
bev_proposal_input = self.bev_bottleneck
img_proposal_input = self.img_bottleneck
# 融合参数
fusion_mean_div_factor = 2.0
# If both img and bev probabilites are set to 1.0, don't do
# path drop.如果image和bev概率都设置为1.0,则不要执行路径丢弃。
#train=0.9,test/val=1.0
#ture!
if not (self._path_drop_probabilities[0] ==
self._path_drop_probabilities[1] == 1.0):
with tf.variable_scope('rpn_path_drop'):
#从均匀分布中输出随机值。.随机输出3个0-1之间的数
random_values = tf.random_uniform(shape=[3],
minval=0.0,
maxval=1.0)
#0.9,0.9.
#不是零就是1
img_mask, bev_mask = self.create_path_drop_masks(
self._path_drop_probabilities[0],
self._path_drop_probabilities[1],
random_values)
#选择是否输入.如果mask为1 ,则输入,否则不输入
img_proposal_input = tf.multiply(img_proposal_input,
img_mask)
bev_proposal_input = tf.multiply(bev_proposal_input,
bev_mask)
self.img_path_drop_mask = img_mask
self.bev_path_drop_mask = bev_mask
# Overwrite the division factor
#在训练时的融合参数
fusion_mean_div_factor = img_mask + bev_mask
#bev和iname的featuremap的裁剪
with tf.variable_scope('proposal_roi_pooling'):
with tf.variable_scope('box_indices'):
'''有点复杂'''
def get_box_indices(boxes):
#list列表.[1,2,3...]
#看不懂
proposals_shape = boxes.get_shape().as_list()
if any(dim is None for dim in proposals_shape):
#false
proposals_shape = tf.shape(boxes)
ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
multiplier = tf.expand_dims(
tf.range(start=0, limit=proposals_shape[0]), 1)
return tf.reshape(ones_mat * multiplier, [-1])
#在列上增加一维?[none,5]?
bev_boxes_norm_batches = tf.expand_dims(
self._bev_anchors_norm_pl, axis=0)
# These should be all 0's since there is only 1 image
#这些应该全是0,因为只有1个图像
tf_box_indices = get_box_indices(bev_boxes_norm_batches)
# Do ROI Pooling on BEV
#tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width)
#[num_boxes, crop_height, crop_width, depth].
'''在网络结构中,融合两种数据之前会有一个resize'''
#主要目的是让两种数据的输入能够统一一下,便于后续做数据融合.resize为6*6的输出
bev_proposal_rois = tf.image.crop_and_resize(
bev_proposal_input,
self._bev_anchors_norm_pl,
tf_box_indices,
self._proposal_roi_crop_size)
# Do ROI Pooling on image
img_proposal_rois = tf.image.crop_and_resize(
img_proposal_input,
self._img_anchors_norm_pl,
tf_box_indices,
self._proposal_roi_crop_size)
self.create_path_drop_masks:这个函数块的主要功能就是确定bev_mask和img_mask,其实我也不明白这样做的意义是啥.
def create_path_drop_masks(self,
p_img,
p_bev,
random_values):
"""Determines global path drop decision based on given probabilities.
Args:
p_img: A tensor of float32, probability of keeping image branch保持图像分支的概率
p_bev: A tensor of float32, probability of keeping bev branch
random_values: A tensor of float32 of shape [3], the results
of coin flips, values should range from 0.0 - 1.0.
Returns:
final_img_mask: A constant tensor mask containing either one or zero
depending on the final coin flip probability.
final_bev_mask: A constant tensor mask containing either one or zero
depending on the final coin flip probability.
"""
#keep=1,kill=0
def keep_branch(): return tf.constant(1.0)
def kill_branch(): return tf.constant(0.0)
# The logic works as follows:
# We have flipped 3 coins, first determines the chance of keeping
# the image branch, second determines keeping bev branch, the third
# makes the final decision in the case where both branches were killed
# off, otherwise the initial img and bev chances are kept.
#首先确定保持图像分支的机会,第二个确定保持bev分支,
# 第三个决定在两个分支被杀掉的情况下做出最终决定,否则保留初始的img和bev机会。
#tf.less()->bool/将x的数据格式转化成dtype.
#random_value[0]<0.9,keep
img_chances = tf.case([(tf.less(random_values[0], p_img),
keep_branch)], default=kill_branch)
#random_value[1]<0.9,keep,默认为kill!
bev_chances = tf.case([(tf.less(random_values[1], p_bev),
keep_branch)], default=kill_branch)
# Decision to determine whether both branches were killed off
#两者是否有一个为1,则返回1.不应该是and吗?
third_flip = tf.logical_or(tf.cast(img_chances, dtype=tf.bool),
tf.cast(bev_chances, dtype=tf.bool))
#两者有一个为1,则third_flip=1.0
third_flip = tf.cast(third_flip, dtype=tf.float32)
# Make a second choice, for the third case
# Here we use a 50/50 chance to keep either image or bev
# If its greater than 0.5, keep the image
#random_value[2]>0.5,keep
img_second_flip = tf.case([(tf.greater(random_values[2], 0.5),
keep_branch)],
default=kill_branch)
# If its less than or equal to 0.5, keep bev
#random_value[2]<=0.5,keep/两者相冲
bev_second_flip = tf.case([(tf.less_equal(random_values[2], 0.5),
keep_branch)],
default=kill_branch)
# Use lambda since this returns another condition and it needs to
# be callable
#如果third=1,则使用img_chances作为最终结果,即random_value[0]<0.9,keep
#否则,以第二种方法作为最终结果/第二种方法里面只会有一个为true
final_img_mask = tf.case([(tf.equal(third_flip, 1),
lambda: img_chances)],
default=lambda: img_second_flip)
#同上
final_bev_mask = tf.case([(tf.equal(third_flip, 1),
lambda: bev_chances)],
default=lambda: bev_second_flip)
return final_img_mask, final_bev_mask
#bev和image的融合
with tf.variable_scope('proposal_roi_fusion'):
rpn_fusion_out = None
#mean
if self._fusion_method == 'mean':
tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois)
#/2平均融合方式
rpn_fusion_out = tf.divide(tf_features_sum,
fusion_mean_div_factor)
elif self._fusion_method == 'concat':
rpn_fusion_out = tf.concat(
[bev_proposal_rois, img_proposal_rois], axis=3)
else:
raise ValueError('Invalid fusion method', self._fusion_method)
# TODO: move this section into an separate AnchorPredictor class
with tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]):
#融合后的作为输入
tensor_in = rpn_fusion_out
# Parse rpn layers config
layers_config = self._config.layers_config.rpn_config
#0.0005
l2_weight_decay = layers_config.l2_weight_decay
if l2_weight_decay > 0:
#正则化
weights_regularizer = slim.l2_regularizer(l2_weight_decay)
else:
weights_regularizer = None
with slim.arg_scope([slim.conv2d],
weights_regularizer=weights_regularizer):
# Use conv2d instead of fully_connected layers.
#256,6上一层的输出实际上就是6*6的所以将全连接化为卷积操作,使用6*6的卷积核
cls_fc6 = slim.conv2d(tensor_in,
layers_config.cls_fc6,
self._proposal_roi_crop_size,
padding='VALID',
scope='cls_fc6')
#0.5
cls_fc6_drop = slim.dropout(cls_fc6,
layers_config.keep_prob,
is_training=self._is_training,
scope='cls_fc6_drop')
#256
cls_fc7 = slim.conv2d(cls_fc6_drop,
layers_config.cls_fc7,
[1, 1],
scope='cls_fc7')
cls_fc7_drop = slim.dropout(cls_fc7,
layers_config.keep_prob,
is_training=self._is_training,
scope='cls_fc7_drop')
#2,分类
cls_fc8 = slim.conv2d(cls_fc7_drop,
2,
[1, 1],
activation_fn=None,
scope='cls_fc8')
#删除指定尺寸为1 的
objectness = tf.squeeze(
cls_fc8, [1, 2],
name='cls_fc8/squeezed')
# Use conv2d instead of fully_connected layers.
#256,6
reg_fc6 = slim.conv2d(tensor_in,
layers_config.reg_fc6,
self._proposal_roi_crop_size,
padding='VALID',
scope='reg_fc6')
#dropout0.5
reg_fc6_drop = slim.dropout(reg_fc6,
layers_config.keep_prob,
is_training=self._is_training,
scope='reg_fc6_drop')
#256
reg_fc7 = slim.conv2d(reg_fc6_drop,
layers_config.reg_fc7,
[1, 1],
scope='reg_fc7')
reg_fc7_drop = slim.dropout(reg_fc7,
layers_config.keep_prob,
is_training=self._is_training,
scope='reg_fc7_drop')
#∆t x , ∆t y , ∆t z , ∆d x , ∆d y , ∆d z
#256,6个回归值包括中心点差值,以及长宽高的差值
reg_fc8 = slim.conv2d(reg_fc7_drop,
6,
[1, 1],
activation_fn=None,
scope='reg_fc8')
#?
offsets = tf.squeeze(
reg_fc8, [1, 2],
name='reg_fc8/squeezed')