import 部分
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.slim import losses
from tensorflow.contrib.slim import arg_scope
import numpy as np
from layer_utils.snippets import generate_anchors_pre, generate_anchors_pre_tf
from layer_utils.proposal_layer import proposal_layer, proposal_layer_tf
from layer_utils.proposal_top_layer import proposal_top_layer, proposal_top_layer_tf
from layer_utils.anchor_target_layer import anchor_target_layer
from layer_utils.proposal_target_layer import proposal_target_layer
from utils.visualization import draw_bounding_boxes
from model.config import cfg
构建网络的“总”函数
def _build_network(self, is_training=True):
# select initializers
if cfg.TRAIN.TRUNCATED:
initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
else:
initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
net_conv = self._image_to_head(is_training)
with tf.variable_scope(self._scope, self._scope):
# build the anchors for the image
self._anchor_component()
# region proposal network
rois = self._region_proposal(net_conv, is_training, initializer)
# region of interest pooling
if cfg.POOLING_MODE == 'crop':
pool5 = self._crop_pool_layer(net_conv, rois, "pool5")
else:
raise NotImplementedError
fc7 = self._head_to_tail(pool5, is_training)
with tf.variable_scope(self._scope, self._scope):
# region classification
cls_prob, bbox_pred = self._region_classification(fc7, is_training,
initializer, initializer_bbox)
self._score_summaries.update(self._predictions)
return rois, cls_prob, bbox_pred
1 使用 initializer 设置box等参数
if cfg.TRAIN.TRUNCATED:
initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
else:
initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
2 self._anchor_component()
调用snippets.py文件中的
generate_anchors_pre
generate_anchors_pre_tf
产生anchors,返回 anchors 的参数(方框位置)和anchors的数量 anchor_length
anchors.set_shape([None, 4]) # 每个anchor使用4个参数描述其位置
anchor_length.set_shape([]) # number of anchors
self._anchors = anchors # 把anchors的位置信息赋给类变量 _anchors
self._anchor_length = anchor_length # 类似,把信息赋给类变量
3 rois = self._region_proposal(net_conv, is_training, initializer):
构建RPN网络(各语句信息见代码注释)
def _region_proposal(self, net_conv, is_training, initializer):
rpn = slim.conv2d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer,
scope="rpn_conv/3x3")
# 在分类网络输出的feature map基础上做一次 3x3 卷积,输出的 rpn 将被cls_score_layer和bbox_pred_layer用作输入
self._act_summaries.append(rpn)
rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,
weights_initializer=initializer,
padding='VALID', activation_fn=None, scope='rpn_cls_score')
# cls_score_layer 1x1卷积 得到各类别的分数(scores),经过softmax计算概率值(prob),argmax得到格式为 [... 1 0 ...] 的预测结果(pred)。中间经过多次reshape。
# change it so that the score has 2 as its channel size
rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
# 预测结果 pred
rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name="rpn_cls_pred")
# 概率值 prob
rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")
# bbox_pred层 1x1卷积,输出 _num_anchors*4 个值,每个anchor使用4个值描述框的位置
rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,
weights_initializer=initializer,
padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
if is_training:
# 利用 cls_prob和bbox_pred 的信息,在proposal layer合成rois(即 region proposals); 另一个值 roi_scores 是 roi 对应的 cls_prob 值,即框内物体属于各类目标的概率值
rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
# 利用 cls_score 的信息为anchors分配标签信息 : 1=前景 0=背景 -1=忽略
rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
# Try to have a deterministic order for the computing graph, for reproducibility
with tf.control_dependencies([rpn_labels]):
rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
else:
if cfg.TEST.MODE == 'nms':
rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
elif cfg.TEST.MODE == 'top':
rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
else:
raise NotImplementedError
# 保存信息到 类变量
self._predictions["rpn_cls_score"] = rpn_cls_score
self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
self._predictions["rpn_cls_prob"] = rpn_cls_prob
self._predictions["rpn_cls_pred"] = rpn_cls_pred
self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
self._predictions["rois"] = rois
return rois
关于 ROI pooling 的代码还不清楚,暂且跳过
4 self._region_classification()
最后两个并行的全连接层,很简单,直接把代码贴在下面
def _region_classification(self, fc7, is_training, initializer, initializer_bbox):
cls_score = slim.fully_connected(fc7, self._num_classes,
weights_initializer=initializer,
trainable=is_training,
activation_fn=None, scope='cls_score')
cls_prob = self._softmax_layer(cls_score, "cls_prob")
cls_pred = tf.argmax(cls_score, axis=1, name="cls_pred")
bbox_pred = slim.fully_connected(fc7, self._num_classes * 4,
weights_initializer=initializer_bbox,
trainable=is_training,
activation_fn=None, scope='bbox_pred')
self._predictions["cls_score"] = cls_score
self._predictions["cls_pred"] = cls_pred
self._predictions["cls_prob"] = cls_prob
self._predictions["bbox_pred"] = bbox_pred
return cls_prob, bbox_pred