Tensorflow---Faster RCNN网络(预测过程)(四)

Tensorflow—Faster RCNN网络(预测过程)(四)

https://blog.csdn.net/weixin_42206075/article/details/112026162
https://blog.csdn.net/weixin_42206075/article/details/111984086

上面两篇blog中,已经对Faster RCNN的训练阶段进行了解释,下面就是Faster RCNN的预测过程的代码实现。
Tensorflow---Faster RCNN网络(预测过程)(四)_第1张图片
在run step1~4的py文件之后,会生成上图的三个文件,其中两个是训练的权重文件,还有一个是rpn生成的roi框的字典。然后就运行inference.py文件进行预测了 ~

unified_network\config.py

CLASSES = ["aeroplane", "bicycle", "bird", "boat", "bottle",
         "bus", "car", "cat", "chair", "cow",
         "diningtable", "dog", "horse", "motorbike", "person",
         "pottedplant", "sheep", "sofa", "train", "tvmonitor"] #检测类别,共20种

RATIO = [0.5, 1.0, 2.0] #anchor box长宽比
SCALE = [128, 256, 512] #anchor box正方形边长
POOLED_H = 7 #RoIPooling的池化尺寸高
POOLED_W = 7 #RoIPooling的池化尺寸宽

IMG_H = 600 #喂入faster rcnn网络图片的高
IMG_W = 800 #喂入faster rcnn网络图片的宽
NMS_THRESHOLD = 0.7 #RPN训练时正样本划分nms的阈值
NUMS_PROPOSAL = 300 #RPN输出到faster rcnn训练的ROI框数量,训练阶段是一张图片生成2000个ROI,预测是生成300个ROI

MINIBATCH = 256
BATCHSIZE = 1 #训练的批次大小
LEARNING_RATE = 0.001 #初始学习率
WEIGHT_DECAY = 0.0005 #权重损失衰减系数
MOMENTUM = 0.9 #动量训练器参数

EPSILON = 1e-10 #平滑参数

XML_PATH = "./VOCdevkit/VOC2007/Annotations/" #xml文件路径
IMG_PATH = "./VOCdevkit/VOC2007/JPEGImages/" #image文件路径

***rpn_proposal\vggnet.py(vgg_16函数)***
作用:同rpn_proposal\vggnet.py,构建vgg16模型,返回最后一层的feature map作为RPN的输入

def vgg_16(inputs,
           num_classes=1000,
           is_training=True,
           dropout_keep_prob=0.5,
           scope='vgg_16'):
  """Oxford Net VGG 16-Layers version D Example.

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
  inputs -= tf.constant([123.68, 116.779, 103.939])
  inputs /= 255
  with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc:
    end_points_collection = sc.original_name_scope + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with arg_scope(
        [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
        outputs_collections=end_points_collection):
      net = layers_lib.repeat(
          inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1', trainable=False)
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool1', padding="SAME")
      net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2', trainable=False)
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool2', padding="SAME")
      net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool3', padding="SAME")
      net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool4', padding="SAME")
      net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5')
      end_points = utils.convert_collection_to_dict(end_points_collection)
      return end_points["vgg_16/conv5/conv5_3"]

***unified_network\ops.py(rpn2proposal函数)***
作用:将rpn生成出来的两个预测head(分类与回归)进行偏移量的转换,然后进行nms操作过滤,最后返回过滤完的预测框

def rpn2proposal(cls, reg, anchors):
    cls, reg = cls[0], reg[0]
    anchors = tf.constant(anchors, dtype=tf.float32)

    normal_bbox, reverse_bbox = offset2bbox(reg, anchors) #reg, anchors这两个的shape都是(17100,4),转为xyxy格式
    score = tf.nn.softmax(cls)[:, 1]

    box_idx = tf.image.non_max_suppression(reverse_bbox, score, max_output_size=NUMS_PROPOSAL, iou_threshold=NMS_THRESHOLD)
    reverse_bbox = tf.nn.embedding_lookup(reverse_bbox, box_idx)
    normal_bbox = tf.nn.embedding_lookup(normal_bbox, box_idx)
    temp = tf.constant([[IMG_H, IMG_W, IMG_H, IMG_W]],  )
    reverse_bbox = reverse_bbox / temp #进行坐标归一化操作
    bbox_idx = tf.zeros([NUMS_PROPOSAL], dtype=tf.int32)
    return normal_bbox, reverse_bbox, bbox_idx

***unified_network\vggnet.py(roi_fc函数)***
作用:构建faster rcnn中最后一层roi pooling和两层全连接

def roi_fc(inputs, boxes, box_idx, scope='vgg_16'):
  with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc:
    end_points_collection = sc.original_name_scope + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
            outputs_collections=end_points_collection):
      # Use conv2d instead of fully_connected layers.
      net = roi_pooling(inputs, boxes, box_idx)
      net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
      net = layers_lib.dropout(
          net, 0.5, is_training=True, scope='dropout6')
      net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
      net = layers_lib.dropout(
          net, 0.5, is_training=True, scope='dropout7')

      return net

***unified_network\networks.py(unified_net函数)***
作用:输入一张原始图片,返回faster rcnn的最终回归head和分类head,还有经过rpn进行筛选的300个ROI框

def unified_net(inputs, anchors):
    inputs = vgg_16(inputs) #shape=(1, 38, 50, 512)
    rpn_cls, rpn_reg = rpn(inputs)  # shape=(1, 38*50*9, 2),shape=(1, 38*50*9, 4)
    normal_bbox, reverse_bbox, bbox_idx = rpn2proposal(rpn_cls, rpn_reg, anchors)

    inputs = roi_fc(inputs, reverse_bbox, bbox_idx)
    inputs = tf.squeeze(inputs, axis=[1, 2])
    cls = fully_connected("classification", inputs, len(CLASSES)+1)
    reg = fully_connected("regression", inputs, 4)
    return cls, reg, normal_bbox

inference.py
作用:主文件

import tensorflow as tf
import numpy as np
from PIL import Image

from unified_network.networks import unified_net
from unified_network.ops import offset2bbox, non_max_suppression
from rpn_proposal.utils import generate_anchors, draw_box
from unified_network.config import IMG_H, IMG_W


anchors = generate_anchors()
def inference():
    imgs = tf.placeholder(tf.float32, [1, IMG_H, IMG_W, 3])
    cls, reg, proposal = unified_net(imgs, anchors) #shape=(300, 21),shape=(300, 4),shape=(300, 4)

    x0, y0, x1, y1 = proposal[:, 0:1], proposal[:, 1:2], proposal[:, 2:3], proposal[:, 3:4]
    x, y, w, h = (x0 + x1) / 2, (y0 + y1) / 2, x1 - x0, y1 - y0
    proposal = tf.concat([x, y, w, h], axis=1) #通过rpn得到300个初步的候选框,要重新进行解码成xywh,然后
    normal_bbox, reverse_bbox = offset2bbox(reg, proposal)
    cls = tf.nn.softmax(cls)
    boxes, score, classes = non_max_suppression(cls, reverse_bbox)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    fast_rcnn_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="vgg_16") + \
                    tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="classification") + \
                    tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="regression")
    rpn_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn")
    saver = tf.train.Saver(fast_rcnn_var)
    saver.restore(sess, "./fast_rcnn/model/model.ckpt")
    saver = tf.train.Saver(rpn_var)
    saver.restore(sess, "./rpn_proposal/model/model.ckpt")

    IMGS = np.array(Image.open("./road.jpg").resize([IMG_W, IMG_H]))

    [BBOX, SCORE, CLS] = sess.run([boxes, score, classes], feed_dict={
     imgs: IMGS[np.newaxis]})
    X0, Y0, X1, Y1 = BBOX[:, 0:1], BBOX[:, 1:2], BBOX[:, 2:3], BBOX[:, 3:4]
    X, Y, W, H = (X0 + X1) / 2, (Y0 + Y1) / 2, X1 - X0, Y1 - Y0
    BBOX = np.concatenate((X, Y, W, H), axis=-1)
    Image.fromarray(np.uint8(draw_box(IMGS, BBOX, CLS))).show()


if __name__ == "__main__":
    inference()

你可能感兴趣的:(Faster,R-CNN,目标检测,tensorflow,网络,tensorflow,深度学习,机器学习,计算机视觉)