OpenPose -tensorflow代码解析(3)—— 网络结构的搭建 Net.py

前言

该openpose-tensorflow的工程是自己实现的,所以有些地方写的会比较简单,但阅读性强、方便使用。

论文翻译 || openpose – Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields
工程实现 || 基于opencv使用openpose完成人体姿态估计

OpenPose -tensorflow代码解析(1)——工程概述&&训练前的准备
OpenPose -tensorflow代码解析(2)—— 数据增强和处理 dataset.py
OpenPose -tensorflow代码解析(3)—— 网络结构的搭建 Net.py
OpenPose -tensorflow代码解析(4)—— 训练脚本 train.py
OpenPose -tensorflow代码解析(5)—— 预测代码解析 predict.py

1 openpose 的网络结构

  • 【网络结构】
    openpose的 基础网络选择的是 vgg16,出来的特征图再经过两个通道,完成关键点、连接方式的预测。
    每个stage出来的特征图,会和基础网络的输出特征图进行concat,再传入下一个stage,然后重复多个stage(可设置)。并且在每个stage之后进行loss的设置。
    下面附上论文中的结构图。以及 在 tensorboard中显示的网络结构图、stage内部的结构图。
  • 【loss function】
    对于每个stage 输出的特征图,都会和label进行损失计算。计算方式是 均方差。可以理解为中继监督,让网络在每一个阶段,都向着label的方向进行收敛,加快网络的训练和预测的精度

    OpenPose -tensorflow代码解析(3)—— 网络结构的搭建 Net.py_第1张图片
    ==========================================================================================================OpenPose -tensorflow代码解析(3)—— 网络结构的搭建 Net.py_第2张图片
    ==========================================================================================================OpenPose -tensorflow代码解析(3)—— 网络结构的搭建 Net.py_第3张图片
    ==========================================================================================================
# NET.py 定义了网络结构和 loss function

from opt import *
from base_net import *
import tensorflow as tf


class OpenPose(object):
   def __init__(self, input_data, trainable):
       self.cpm_num = cfg.OP.cpm_num+1   # 关键点数量 + 背景类
       self.paf_num = cfg.OP.paf_num             # 关键点连接数量×2
       self.trainable = trainable
       self.PAF = []
       self.CPM = []
       self.__build_network(input_data)

   def __build_network(self, input_data):

       with tf.variable_scope("MODEL"):

           with tf.variable_scope("base"):
               x = convolutional(input_data,  64, (3, 3), (1, 1), trainable=self.trainable, name="conv1_1")
               x = convolutional(x, 64, (3, 3), (1, 1), trainable=self.trainable, name="conv1_2")
               x = max_pool(x, (2, 2), (2, 2), name='pool1', padding='VALID')
               x = convolutional(x, 128, (3, 3), (1, 1), trainable=self.trainable, name='conv2_1')
               x = convolutional(x, 128, (3, 3), (1, 1), trainable=self.trainable, name='conv2_2')
              x = max_pool(x, (2, 2), (2, 2), name='pool2', padding='VALID')
               x = convolutional(x, 256, (3, 3), (1, 1), trainable=self.trainable, name='conv3_1')
               x = convolutional(x, 256, (3, 3), (1, 1), trainable=self.trainable, name='conv3_2')
               x = convolutional(x, 256, (3, 3), (1, 1), trainable=self.trainable, name='conv3_3')
               x = convolutional(x, 256, (3, 3), (1, 1), trainable=self.trainable, name='conv3_4')
               x = max_pool(x, (2, 2), (2, 2), name='pool3', padding='VALID')
               x = convolutional(x, 512, (3, 3), (1, 1), trainable=self.trainable,  name='conv4_1')
               x = convolutional(x, 512, (3, 3), (1, 1), trainable=self.trainable, name='conv4_2')
               x = convolutional(x, 256, (3, 3), (1, 1), trainable=self.trainable, name='conv4_3_CPM')
               base = convolutional(x, 128, (3, 3), (1, 1), trainable=self.trainable, name='conv4_4_CPM',bn=False)

           with tf.variable_scope("stage1"):
               with tf.variable_scope("PAF"):
                   x = convolutional(base, 128, (3, 3), (1, 1), trainable=self.trainable, name='conv1')
                   x = convolutional(x, 128, (3, 3), (1, 1), trainable=self.trainable, name='conv2')
                   x = convolutional(x, 128, (3, 3), (1, 1), trainable=self.trainable, name='conv3')
                   x = convolutional(x, 512, (1, 1), (1, 1), trainable=self.trainable, name='conv4')
                   paf_stage = convolutional(x, self.paf_num, (1, 1), (1, 1), trainable=self.trainable, name='conv5', activate=False, bn=False)
                   self.PAF.append(paf_stage)

               with tf.variable_scope("CPM"):
                   x = convolutional(base, 128, (3, 3), (1, 1), trainable=self.trainable, name='conv1')
                   x = convolutional(x, 128,  (3, 3), (1, 1), trainable=self.trainable, name='conv2')
                   x = convolutional(x, 128, (3, 3), (1, 1), trainable=self.trainable, name='conv3')
                   x = convolutional(x, 512, (1, 1), (1, 1), trainable=self.trainable, name='conv4')
                   cpm_stage = convolutional(x, self.cpm_num, (1, 1), (1, 1), trainable=self.trainable, name='conv5', activate=False,bn=False)
                   self.CPM.append(cpm_stage)

           for stage in range(2, 7):
               with tf.variable_scope("stage{}".format(stage)):

                   concat = route([paf_stage,cpm_stage, base], name="concat")
                   with tf.variable_scope("PAF"):
                       x = convolutional(concat, 128, (7, 7), (1, 1), trainable=self.trainable, name='conv1')
                       x = convolutional(x, 128, (7, 7), (1, 1), trainable=self.trainable, name='conv2')
                       x = convolutional(x, 128, (7, 7), (1, 1), trainable=self.trainable, name='conv3')
                       x = convolutional(x, 128, (7, 7), (1, 1), trainable=self.trainable, name='conv4')
                       x = convolutional(x, 128, (7, 7), (1, 1), trainable=self.trainable, name='conv5')
                       x = convolutional(x, 128, (1, 1), (1, 1), trainable=self.trainable, name='conv6')
                       paf_stage = convolutional(x, self.paf_num, (1, 1), (1, 1), trainable=self.trainable, name='conv7', activate=False,bn=False)
                       self.PAF.append(paf_stage)

                   with tf.variable_scope("CPM"):
                       x = convolutional(concat, 128, (7, 7), (1, 1), trainable=self.trainable, name='conv1')
                       x = convolutional(x, 128, (7, 7), (1, 1), trainable=self.trainable, name='conv2')
                       x = convolutional(x, 128, (7, 7), (1, 1), trainable=self.trainable, name='conv3')
                       x = convolutional(x, 128, (7, 7), (1, 1), trainable=self.trainable, name='conv4')
                       x = convolutional(x, 128, (7, 7), (1, 1), trainable=self.trainable, name='conv5')
                       x = convolutional(x, 128, (1, 1), (1, 1), trainable=self.trainable, name='conv6')
                       cpm_stage = convolutional(x, self.cpm_num, (1, 1), (1, 1), trainable=self.trainable, name='conv7', activate=False,bn=False)
                       self.CPM.append(cpm_stage)



   def loss_layer(self, heatmap_node, vectmap_node):

       with tf.device(tf.DeviceSpec(device_type="GPU")):

           losses = []
           last_losses_l1 = []
           last_losses_l2 = []

           for idx, (PAF, CPM) in enumerate(zip(self.PAF, self.CPM)):
               # loss_l1 = tf.nn.l2_loss(tf.concat(PAF, axis=0) - vectmap_node, name='loss_l1_stage%d' % (idx))
               # loss_l2 = tf.nn.l2_loss(tf.concat(CPM, axis=0) - heatmap_node, name='loss_l2_stage%d' % (idx))
               loss_l1 = tf.reduce_mean((tf.concat(PAF, axis=0) - vectmap_node)**2, name='loss_l1_stage%d' % (idx))
               loss_l2 = tf.reduce_mean((tf.concat(CPM, axis=0) - heatmap_node)**2, name='loss_l2_stage%d' % (idx))
               losses.append(tf.reduce_mean([loss_l1, loss_l2]))
           last_losses_l1.append(loss_l1)
           last_losses_l2.append(loss_l2)

           self.total_loss = tf.reduce_mean(losses)
           self.total_loss_paf = tf.reduce_mean(last_losses_l1)
           self.total_loss_heat = tf.reduce_mean(last_losses_l2)
           self.total_loss_ll = self.total_loss_paf + self.total_loss_heat

       return self.total_loss, self.total_loss_paf, self.total_loss_heat, self.total_loss_ll

   def get_lastlayer(self):
       return self.PAF[-1],  self.CPM[-1]

2 基础网络层的定义

  • 这里附上了有 卷积层、池化层、合并层、残差层、上采样层。其中后两种该网络是没有使用到的。(比较简单,不做详细描述)
  • 值得注意的是:
    (1) 不同的初始化会产生不同的训练情况。在openpose网络中,没有使用 BN,所以需要设置 偏置bias。偏置初始话的选择:当tf.constant_initializer(0.0)的使用,延迟了训练时网络的收敛的时间;当 tf.contrib.layers.xavier_initializer(),可提升训练的速度和效果
    (2) tf.nn.leaky_relu 的使用,延迟了网络的收敛。所以还是选择tf.nn.relu。

import tensorflow as tf

init_weights = [tf.random_normal_initializer(stddev=0.01),
                              tf.contrib.layers.xavier_initializer()]

init_weight = init_weights[1]


def convolutional(input_data, output_C, kernel_size, stride_size, trainable, name, activate=True, bn=False):

   with tf.variable_scope(name):
       if stride_size[0] != 1 or stride_size[1] != 1:
           pad_h, pad_w = (kernel_size[0] - 2) // 2 + 1, (kernel_size[1] - 2) // 2 + 1
           paddings = tf.constant([[0, 0], [pad_h, pad_h], [pad_w, pad_w], [0, 0]])
           input_data = tf.pad(input_data, paddings, 'CONSTANT')
           strides = (1, stride_size[0], stride_size[1], 1)
           padding = 'VALID'
       else:
           strides = (1, 1, 1, 1)
           padding = "SAME"

       input_C = int(input_data.get_shape()[-1])
       filters_shape = (kernel_size[0], kernel_size[1], input_C, output_C)
       weight = tf.get_variable(name='weight', dtype=tf.float32, trainable=trainable,
                                shape=filters_shape, initializer=init_weight)
       conv = tf.nn.conv2d(input=input_data, filter=weight, strides=strides, padding=padding)

       if bn:
           conv = tf.layers.batch_normalization(conv, beta_initializer=tf.zeros_initializer(),
                                                gamma_initializer=tf.ones_initializer(),
                                                moving_mean_initializer=tf.zeros_initializer(),
                                                moving_variance_initializer=tf.ones_initializer(), training=trainable,
                                                name = "bn")
       else:
           bias = tf.get_variable(name='bias', shape=filters_shape[-1], trainable=trainable,
                                  dtype=tf.float32, initializer=init_weight)
           conv = tf.nn.bias_add(conv, bias)
           
       # if activate == True: conv = tf.nn.leaky_relu(conv, alpha=0.1)
       if activate == True: conv = tf.nn.relu(conv)

   return conv



def residual_block(input_data, filter_num1, filter_num2, trainable, name):
   short_cut = input_data
   with tf.variable_scope(name):
       input_data = convolutional(input_data, filter_num1, (1, 1), (1, 1), trainable=trainable, name='conv1')
       input_data = convolutional(input_data, filter_num2, (3, 3), (1, 1), trainable=trainable, name='conv2')
       residual_output = input_data + short_cut
   return residual_output

def route(input_layer, name):
   with tf.variable_scope(name):
       output = tf.concat(input_layer, axis=-1)
   return output


def upsample(input_data, name, method="deconv"):
   assert method in ["resize", "deconv"]
   if method == "resize":
       with tf.variable_scope(name):
           input_shape = tf.shape(input_data)
           output = tf.image.resize_nearest_neighbor(input_data, (input_shape[1] * 2, input_shape[2] * 2))
       return output
   if method == "deconv":
       # replace resize_nearest_neighbor with conv2d_transpose To support TensorRT optimization
       numm_filter = input_data.shape.as_list()[-1]
       output = tf.layers.conv2d_transpose(input_data, numm_filter, kernel_size=2, padding='same',
                                           strides=(2 ,2), kernel_initializer=init_weight)
       return output


def max_pool(input, kernel_size, stride_size, name, padding=None):
   return tf.nn.max_pool(input,
                         ksize=[1, kernel_size[0], kernel_size[1], 1],
                         strides=[1, stride_size[0], stride_size[1], 1],
                         padding=padding,
                         name=name)

你可能感兴趣的:(openpose系列)