AVOD-代码理解系列(三)

AVOD-代码理解

  从输入到数据融合之前

    def build(self):

        # Setup input placeholders
        # 一堆的输入信息.就是给输入占个坑
        self._set_up_input_pls()

        # Setup feature extractors
        # 输入图像的featuremap
        self._set_up_feature_extractors()
        # proposal_input
        # 1*1卷积后的bev_fasturemap和img_featuremap
        bev_proposal_input = self.bev_bottleneck
        img_proposal_input = self.img_bottleneck
        # 融合参数
        fusion_mean_div_factor = 2.0

        # If both img and bev probabilites are set to 1.0, don't do
        # path drop.如果image和bev概率都设置为1.0,则不要执行路径丢弃。
        #train=0.9,test/val=1.0
        #ture!
        if not (self._path_drop_probabilities[0] ==
                self._path_drop_probabilities[1] == 1.0):
            with tf.variable_scope('rpn_path_drop'):
                #从均匀分布中输出随机值。.随机输出3个0-1之间的数
                random_values = tf.random_uniform(shape=[3],
                                                  minval=0.0,
                                                  maxval=1.0)
                #0.9,0.9.
                #不是零就是1
                img_mask, bev_mask = self.create_path_drop_masks(
                    self._path_drop_probabilities[0],
                    self._path_drop_probabilities[1],
                    random_values)
                #选择是否输入.如果mask为1 ,则输入,否则不输入
                img_proposal_input = tf.multiply(img_proposal_input,
                                                 img_mask)

                bev_proposal_input = tf.multiply(bev_proposal_input,
                                                 bev_mask)

                self.img_path_drop_mask = img_mask
                self.bev_path_drop_mask = bev_mask

                # Overwrite the division factor
                #在训练时的融合参数
                fusion_mean_div_factor = img_mask + bev_mask
        #bev和iname的featuremap的裁剪
        with tf.variable_scope('proposal_roi_pooling'):

            with tf.variable_scope('box_indices'):
                '''有点复杂'''
                def get_box_indices(boxes):
                    #list列表.[1,2,3...]
                    #看不懂
                    proposals_shape = boxes.get_shape().as_list()
                    if any(dim is None for dim in proposals_shape):
                        #false
                        proposals_shape = tf.shape(boxes)
                    ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
                    multiplier = tf.expand_dims(
                        tf.range(start=0, limit=proposals_shape[0]), 1)
                    return tf.reshape(ones_mat * multiplier, [-1])
                #在列上增加一维?[none,5]?
                bev_boxes_norm_batches = tf.expand_dims(
                    self._bev_anchors_norm_pl, axis=0)

                # These should be all 0's since there is only 1 image
                #这些应该全是0,因为只有1个图像
                tf_box_indices = get_box_indices(bev_boxes_norm_batches)

            # Do ROI Pooling on BEV
            #tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width)
            #[num_boxes, crop_height, crop_width, depth].
            '''在网络结构中,融合两种数据之前会有一个resize'''
            #主要目的是让两种数据的输入能够统一一下,便于后续做数据融合.resize为6*6的输出
            bev_proposal_rois = tf.image.crop_and_resize(
                bev_proposal_input,
                self._bev_anchors_norm_pl,
                tf_box_indices,
                self._proposal_roi_crop_size)
            # Do ROI Pooling on image
            img_proposal_rois = tf.image.crop_and_resize(
                img_proposal_input,
                self._img_anchors_norm_pl,
                tf_box_indices,
                self._proposal_roi_crop_size)

self.create_path_drop_masks:这个函数块的主要功能就是确定bev_mask和img_mask,其实我也不明白这样做的意义是啥.

 def create_path_drop_masks(self,
                               p_img,
                               p_bev,
                               random_values):
        """Determines global path drop decision based on given probabilities.

        Args:
            p_img: A tensor of float32, probability of keeping image branch保持图像分支的概率
            p_bev: A tensor of float32, probability of keeping bev branch
            random_values: A tensor of float32 of shape [3], the results
                of coin flips, values should range from 0.0 - 1.0.

        Returns:
            final_img_mask: A constant tensor mask containing either one or zero
                depending on the final coin flip probability.
            final_bev_mask: A constant tensor mask containing either one or zero
                depending on the final coin flip probability.
        """
        #keep=1,kill=0
        def keep_branch(): return tf.constant(1.0)

        def kill_branch(): return tf.constant(0.0)

        # The logic works as follows:
        # We have flipped 3 coins, first determines the chance of keeping
        # the image branch, second determines keeping bev branch, the third
        # makes the final decision in the case where both branches were killed
        # off, otherwise the initial img and bev chances are kept.
        #首先确定保持图像分支的机会,第二个确定保持bev分支,
        # 第三个决定在两个分支被杀掉的情况下做出最终决定,否则保留初始的img和bev机会。
        #tf.less()->bool/将x的数据格式转化成dtype.
        #random_value[0]<0.9,keep
        img_chances = tf.case([(tf.less(random_values[0], p_img),
                                keep_branch)], default=kill_branch)
        #random_value[1]<0.9,keep,默认为kill!
        bev_chances = tf.case([(tf.less(random_values[1], p_bev),
                                keep_branch)], default=kill_branch)

        # Decision to determine whether both branches were killed off
        #两者是否有一个为1,则返回1.不应该是and吗?
        third_flip = tf.logical_or(tf.cast(img_chances, dtype=tf.bool),
                                   tf.cast(bev_chances, dtype=tf.bool))
        #两者有一个为1,则third_flip=1.0
        third_flip = tf.cast(third_flip, dtype=tf.float32)

        # Make a second choice, for the third case
        # Here we use a 50/50 chance to keep either image or bev
        # If its greater than 0.5, keep the image
        #random_value[2]>0.5,keep
        img_second_flip = tf.case([(tf.greater(random_values[2], 0.5),
                                    keep_branch)],
                                  default=kill_branch)
        # If its less than or equal to 0.5, keep bev
        #random_value[2]<=0.5,keep/两者相冲
        bev_second_flip = tf.case([(tf.less_equal(random_values[2], 0.5),
                                    keep_branch)],
                                  default=kill_branch)

        # Use lambda since this returns another condition and it needs to
        # be callable
        #如果third=1,则使用img_chances作为最终结果,即random_value[0]<0.9,keep
        #否则,以第二种方法作为最终结果/第二种方法里面只会有一个为true
        final_img_mask = tf.case([(tf.equal(third_flip, 1),
                                   lambda: img_chances)],
                                 default=lambda: img_second_flip)
        #同上
        final_bev_mask = tf.case([(tf.equal(third_flip, 1),
                                   lambda: bev_chances)],
                                 default=lambda: bev_second_flip)

        return final_img_mask, final_bev_mask

2 数据融合到rpn,也就是利用卷积网络对其进行分类(二分类:背景和物体)和回归

 #bev和image的融合
        with tf.variable_scope('proposal_roi_fusion'):
            rpn_fusion_out = None
            #mean
            if self._fusion_method == 'mean':
                tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois)
                #/2平均融合方式
                rpn_fusion_out = tf.divide(tf_features_sum,
                                           fusion_mean_div_factor)
            elif self._fusion_method == 'concat':
                rpn_fusion_out = tf.concat(
                    [bev_proposal_rois, img_proposal_rois], axis=3)
            else:
                raise ValueError('Invalid fusion method', self._fusion_method)

        # TODO: move this section into an separate AnchorPredictor class
        with tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]):
            #融合后的作为输入
            tensor_in = rpn_fusion_out

            # Parse rpn layers config
            layers_config = self._config.layers_config.rpn_config
            #0.0005
            l2_weight_decay = layers_config.l2_weight_decay

            if l2_weight_decay > 0:
                #正则化
                weights_regularizer = slim.l2_regularizer(l2_weight_decay)
            else:
                weights_regularizer = None

            with slim.arg_scope([slim.conv2d],
                                weights_regularizer=weights_regularizer):
                # Use conv2d instead of fully_connected layers.
                #256,6上一层的输出实际上就是6*6的所以将全连接化为卷积操作,使用6*6的卷积核
                cls_fc6 = slim.conv2d(tensor_in,
                                      layers_config.cls_fc6,
                                      self._proposal_roi_crop_size,
                                      padding='VALID',
                                      scope='cls_fc6')
                #0.5
                cls_fc6_drop = slim.dropout(cls_fc6,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='cls_fc6_drop')
                #256
                cls_fc7 = slim.conv2d(cls_fc6_drop,
                                      layers_config.cls_fc7,
                                      [1, 1],
                                      scope='cls_fc7')

                cls_fc7_drop = slim.dropout(cls_fc7,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='cls_fc7_drop')
                #2,分类
                cls_fc8 = slim.conv2d(cls_fc7_drop,
                                      2,
                                      [1, 1],
                                      activation_fn=None,
                                      scope='cls_fc8')
                #删除指定尺寸为1 的
                objectness = tf.squeeze(
                    cls_fc8, [1, 2],
                    name='cls_fc8/squeezed')

                # Use conv2d instead of fully_connected layers.
                #256,6
                reg_fc6 = slim.conv2d(tensor_in,
                                      layers_config.reg_fc6,
                                      self._proposal_roi_crop_size,
                                      padding='VALID',
                                      scope='reg_fc6')
                #dropout0.5
                reg_fc6_drop = slim.dropout(reg_fc6,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='reg_fc6_drop')
                #256
                reg_fc7 = slim.conv2d(reg_fc6_drop,
                                      layers_config.reg_fc7,
                                      [1, 1],
                                      scope='reg_fc7')

                reg_fc7_drop = slim.dropout(reg_fc7,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='reg_fc7_drop')
                #∆t x , ∆t y , ∆t z , ∆d x , ∆d y , ∆d z
                #256,6个回归值包括中心点差值,以及长宽高的差值
                reg_fc8 = slim.conv2d(reg_fc7_drop,
                                      6,
                                      [1, 1],
                                      activation_fn=None,
                                      scope='reg_fc8')
                #?
                offsets = tf.squeeze(
                    reg_fc8, [1, 2],
                    name='reg_fc8/squeezed')

你可能感兴趣的:(AVOD,AVOD,点云,深度学习,代码理解,物体检测)