Tensorflow开源的object detection API中的源码解析(三):faster_rcnn_inception_resnet_v2_feature_extractor.py

对于faster_rcnn_meta_arch.py的理解见这篇文章
对于inception_resnet_v2.py的理解见这篇文章


"""Inception Resnet v2 Faster R-CNN implementation.

参见Szegedy等人的“Inception-v4,Inception-ResNet Impact of Residual Connections on
Learning"(论文地址:https://arxiv.org/abs/1602.07261)
以及Huang等人的
“Speed/accuracy trade-offs for modern convolutional object detectors" by
(论文地址:https://arxiv.org/abs/1611.10012)
"""

import tensorflow as tf

from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.net import inception_resnet_v2

slim = tf.contrib.slim


class FasterRCNNInceptionResnetV2FeatureExtractor(
    faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
  """ 这个类主要是Inception_Resnet_v2 Faster R-CNN的特征提取器的相关实现 """

  def __init__(self,
               is_training,
               first_stage_features_stride,
               reuse_weights=None,
               weight_decay=0.0):
    """Constructor.

    Args:
      is_training: 见faster_rcnn_meta_arch的构造器
      first_stage_features_stride: 同上
      reuse_weights: 同上
      weight_decay: 同上

    Raises:
      ValueError: 如果 `first_stage_features_stride` 既不是8也不是16。
    """
    if first_stage_features_stride != 8 and first_stage_features_stride != 16:
      raise ValueError('`first_stage_features_stride` must be 8 or 16.')
    super(FasterRCNNInceptionResnetV2FeatureExtractor, self).__init__(
        is_training, first_stage_features_stride, reuse_weights, weight_decay)

  def preprocess(self, resized_inputs):
    """Faster R-CNN with Inception Resnet v2 的预处理函数.
        将像素值映射到[-1, 1]范围。(归一化)

    Args:
      resized_inputs: 一批值在0到255.0之间的图像
      A [batch, height_in, width_in, channels] float32 tensor

    Returns: 一批图像
      preprocessed_inputs: A [batch, height_out, width_out, channels] float32 tensor.

    """
    # TODO:为什么是这样计算
    return (2.0 / 255.0) * resized_inputs - 1.0

  def _extract_proposal_features(self, preprocessed_inputs, scope):
    """提取第一阶段将用于RPN的特征,返回feature map。实现faster_rcnn_meta_arch中的抽象方法

            使用Inception Resnet v2网络的前半部分提取特征[将用于RPN的特征]。
            在`align_feature_maps = True`模式下构建网络,
            这意味着网络中的所有VALID填充都被改为SAME填充,以便特征映射对齐。

    Args:
      preprocessed_inputs: 一批图像
            A [batch, height, width, channels] float32 tensor
      scope: A scope name.

    Returns:将用于RPN的特征
      rpn_feature_map: A tensor with shape [batch, height, width, depth]

    Raises:
      InvalidArgumentError: 如果`preprocessed_inputs`的尺寸(高度或宽度)小于33。
      ValueError: 如果输入的图像的shape不是4个维度。
    """
    if len(preprocessed_inputs.get_shape().as_list()) != 4:
      raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a '
                       'tensor of shape %s' % preprocessed_inputs.get_shape())

    with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope(
        weight_decay=self._weight_decay)):

      with slim.arg_scope([slim.batch_norm], is_training=False):
        # 通过variable_scope可以实现同一个name_scope中的变量的共享
        # name_scope 和 variable_scope区别:
        # http://sentiment-mining.blogspot.com/2016/12/tensorflow-name-scope-variable-scope.html
        with tf.variable_scope('InceptionResnetV2',
                               reuse=self._reuse_weights) as scope:
          rpn_feature_map, _ = (
              inception_resnet_v2.inception_resnet_v2_base(
                  preprocessed_inputs, final_endpoint='PreAuxLogits',
                  scope=scope, output_stride=self._first_stage_features_stride,
                  align_feature_maps=True))
    return rpn_feature_map

  def _extract_box_classifier_features(self, proposal_feature_maps, scope):
    """提取将用于第二阶段框分类器的特征。

    这个方法重建了Inception ResNet v2的“后半部分”网络——
    `_extract_proposal_features`中定义的就是那“后半部分”。【个人理解:
    相当于原论文中的ROIPooling及其之后的层】

    Args:
      proposal_feature_maps: 用于裁剪出各个proposal的特征图
      A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
      scope: A scope name.

    Returns:
      proposal_classifier_features: 分了类的proposal
      A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
    with tf.variable_scope('InceptionResnetV2', reuse=self._reuse_weights):
      with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope(
          weight_decay=self._weight_decay)):
        # 强制is_training为False,以此来禁用batch nomal update.
        with slim.arg_scope([slim.batch_norm], is_training=False):
          with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                              stride=1, padding='SAME'):
            with tf.variable_scope('Mixed_7a'):
              with tf.variable_scope('Branch_0'):
                tower_conv = slim.conv2d(proposal_feature_maps,
                                         256, 1, scope='Conv2d_0a_1x1')
                tower_conv_1 = slim.conv2d(
                    tower_conv, 384, 3, stride=2,
                    padding='VALID', scope='Conv2d_1a_3x3')
              with tf.variable_scope('Branch_1'):
                tower_conv1 = slim.conv2d(
                    proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1')
                tower_conv1_1 = slim.conv2d(
                    tower_conv1, 288, 3, stride=2,
                    padding='VALID', scope='Conv2d_1a_3x3')
              with tf.variable_scope('Branch_2'):
                tower_conv2 = slim.conv2d(
                    proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1')
                tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
                                            scope='Conv2d_0b_3x3')
                tower_conv2_2 = slim.conv2d(
                    tower_conv2_1, 320, 3, stride=2,
                    padding='VALID', scope='Conv2d_1a_3x3')
              with tf.variable_scope('Branch_3'):
                tower_pool = slim.max_pool2d(
                    proposal_feature_maps, 3, stride=2, padding='VALID',
                    scope='MaxPool_1a_3x3')
              net = tf.concat(
                  [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3)
            net = slim.repeat(net, 9, inception_resnet_v2.block8, scale=0.20)
            net = inception_resnet_v2.block8(net, activation_fn=None)
            proposal_classifier_features = slim.conv2d(
                net, 1536, 1, scope='Conv2d_7b_1x1')
        return proposal_classifier_features

  def restore_from_classification_checkpoint_fn(
      self,
      first_stage_feature_extractor_scope,
      second_stage_feature_extractor_scope):
    """返回从固定的checkpoint中读取到的各种变量,一个字典

    请注意,这覆盖了faster_rcnn_meta_arch中的FasterRCNNFeatureExtractor
    方法,它不适用于InceptionResnetV2的checkpoint。

    TODO: revisit whether it's possible to force the
    `Repeat` namescope as created in `_extract_box_classifier_features` to
    start counting at 2 (e.g. `Repeat_2`) so that the default restore_fn can
    be used.

    Args:
      first_stage_feature_extractor_scope: 第一阶段的特征提取器的scope名字
      second_stage_feature_extractor_scope: 第二阶段的特征提取器的scope名字
    Returns:
      A dict mapping variable names (to load from a checkpoint) to variables in
      the model graph.
      TODO(黄文坚的书有说明这个字典)
      一个映射变量名称(从检查点加载)到模型图中变量的字典。
    """

    variables_to_restore = {}
    for variable in tf.global_variables():
      if variable.op.name.startswith(
          first_stage_feature_extractor_scope):
        var_name = variable.op.name.replace(
            first_stage_feature_extractor_scope + '/', '')
        variables_to_restore[var_name] = variable
      if variable.op.name.startswith(
          second_stage_feature_extractor_scope):
        var_name = variable.op.name.replace(
            second_stage_feature_extractor_scope
            + '/InceptionResnetV2/Repeat', 'InceptionResnetV2/Repeat_2')
        var_name = var_name.replace(
            second_stage_feature_extractor_scope + '/', '')
        variables_to_restore[var_name] = variable
    return variables_to_restore

你可能感兴趣的:(Deep/Machine,Learning)