PFLD论文解析及复现《二》:论文复现
写在前面:
刚好,这两天解决了前面文章关于Mobilenet-ssd物体检测的一些问题,借用训练机跑Mobilenet-ssd模型的空余时间,就前一段时间发布并自己进行复现的文章《PFLD: A Practical Facial Landmark Detector》进行一些说明.
上一篇https://blog.csdn.net/Danbinbo/article/details/96718937就PFLD论文的核心思想进行一些分析和总结,这一篇将会就自己的实现做一些分享。总之,PFLD成为实用人脸特征点检测算法的典范,其中损失函数的设计是整个网络的核心要素,论文实现了三个目标:精度高、速度快、模型小!这里先给出论文地址:https://arxiv.org/pdf/1902.10859.pdf,
人脸关键点检测:这里采用的深度学习框架为Tensorflow:这里直接给出主干网络结构,这里论文中的欧拉角通过人脸训练数据的关键点坐标计算而来。
# 主网络 + 副网络
def Pfld_Netework(input): # 112 * 112 * 3
with tf.name_scope('Pfld_Netework'):
##### Part1: Major Network -- 主网络 #####
#layers1
#input= [None,112,112,3]
with tf.name_scope('layers1'):
W_conv1 = weight_variable([3, 3, 3, 64],name='W_conv1')
b_conv1 = bias_variable([64],name='b_conv1')
x_image = tf.reshape(input, [-1, 112, 112, 3],name='input_X')
x_image = batch_norm(x_image,is_training=True)
h_conv_1 = conv2d(x_image,W_conv1,strides=[1,2,2,1],padding='SAME') + b_conv1
# layers2
with tf.name_scope('layers1'):
W_conv2 = weight_variable([3,3,64,1],name='W_conv2')
b_conv2 = bias_variable([64],name='b_conv2')
h_conv_1 = batch_norm(h_conv_1,is_training=True)
h_conv_2 = deepwise_conv2d(h_conv_1,W_conv2) + b_conv2 # 56 * 56 * 64
# Bottleneck input = [56*56*64]
with tf.name_scope('Mobilenet-V2'):
with tf.name_scope('bottleneck_1'):
h_conv_b1 = make_bottleneck_block(h_conv_2, 2, 64, stride=[1, 2, 2, 1], kernel=(3, 3)) # 28*28*64
h_conv_b1 = make_bottleneck_block(h_conv_b1, 2, 64, stride=[1, 1, 1, 1], kernel=(3, 3)) # 28*28*64
h_conv_b1 = make_bottleneck_block(h_conv_b1, 2, 64, stride=[1, 1, 1, 1], kernel=(3, 3)) # 28*28*64
h_conv_b1 = make_bottleneck_block(h_conv_b1, 2, 64, stride=[1, 1, 1, 1], kernel=(3, 3)) # 28*28*64
h_conv_b1 = make_bottleneck_block(h_conv_b1, 2, 64, stride=[1, 1, 1, 1], kernel=(3, 3)) # 28*28*64
with tf.name_scope('bottleneck_2'):
h_conv_b2 = make_bottleneck_block(h_conv_b1,2,128,stride=[1,2,2,1],kernel=(3,3)) # 14*14*128
with tf.name_scope('bottleneck_3'):
h_conv_b3 = make_bottleneck_block(h_conv_b2, 4, 128, stride=[1, 1, 1, 1], kernel=(3, 3)) # 14*14*128
h_conv_b3 = make_bottleneck_block(h_conv_b3, 4, 128, stride=[1, 1, 1, 1], kernel=(3, 3)) # 14*14*128
h_conv_b3 = make_bottleneck_block(h_conv_b3, 4, 128, stride=[1, 1, 1, 1], kernel=(3, 3)) # 14*14*128
h_conv_b3 = make_bottleneck_block(h_conv_b3, 4, 128, stride=[1, 1, 1, 1], kernel=(3, 3)) # 14*14*128
h_conv_b3 = make_bottleneck_block(h_conv_b3, 4, 128, stride=[1, 1, 1, 1], kernel=(3, 3)) # 14*14*128
h_conv_b3 = make_bottleneck_block(h_conv_b3, 4, 128, stride=[1, 1, 1, 1], kernel=(3, 3)) # 14*14*128
with tf.name_scope('bottleneck_4'):
h_conv_b4 = make_bottleneck_block(h_conv_b3,2,16,stride=[1,1,1,1],kernel=(3,3)) # 14*14*16
# S1
with tf.name_scope('S1'):
h_conv_s1 = h_conv_b4 # 14 * 14 * 16
# s2
with tf.name_scope('S2'):
W_conv_s2 = weight_variable([3,3,16,32],name='W_conv_s2')
b_conv_s2 = bias_variable([32],name='b_conv_s2')
h_conv_s1 = batch_norm(h_conv_s1, is_training=True)
h_conv_s2 = conv2d(h_conv_s1,W_conv_s2,strides=[1,2,2,1],padding='SAME') + b_conv_s2 # 7*7*32
# S3
with tf.name_scope('S3'):
W_conv_s3 = weight_variable([7,7,32,128],name='W_conv_s3')
b_conv_s3 = bias_variable([128],name='b_conv_s3')
h_conv_s2 = batch_norm(h_conv_s2, is_training=True)
h_conv_s3 = conv2d(h_conv_s2,W_conv_s3,strides=[1,1,1,1],padding='VALID') + b_conv_s3 # 1 * 1 * 128
# MS-FC(多尺度全连接层)
with tf.name_scope('MS-FC'):
########################111--收敛较快#################################
W_conv_fc_s1 = weight_variable([14, 14, 16, 64], name='W_conv_s2')
b_conv_fc_s1 = bias_variable([64], name='b_conv_s2')
h_conv_s1 = batch_norm(h_conv_s1,is_training=True)
h_conv_fc_s1 = conv2d(h_conv_s1,W_conv_fc_s1,strides=[1,1,1,1],padding='VALID') + b_conv_fc_s1 # 1*1*64
W_conv_fc_s2 = weight_variable([7, 7, 32, 64], name='W_conv_s2')
b_conv_fc_s2 = bias_variable([64], name='b_conv_s2')
h_conv_s2 = batch_norm(h_conv_s2, is_training=True)
h_conv_fc_s2 = conv2d(h_conv_s2, W_conv_fc_s2, strides=[1, 1, 1, 1],padding='VALID') + b_conv_fc_s2 # 1*1*64
h_conv_s3 = batch_norm(h_conv_s3, is_training=True)
h_conv_ms_fc = tf.concat([h_conv_fc_s1,h_conv_fc_s2,h_conv_s3],axis=3) # 1*1*256
h_conv_ms_fc = tf.reshape(h_conv_ms_fc,(-1,1*1*256))
W_ms_fc = weight_variable([1*1*256, 136], name='W_fc_s2')
b_ms_fc = bias_variable([136], name='b_fc_s2')
pre_landmark = tf.add(tf.matmul(h_conv_ms_fc, W_ms_fc), b_ms_fc, name='landmark_3')
#########################222--效果很差################################
"""
W_conv_fc_s1 = tf.reshape(h_conv_s1,[-1,14*14*16])
W_conv_fc_s2 = tf.reshape(h_conv_s2,[-1,7*7*32])
W_conv_fc_s3 = tf.reshape(h_conv_s3,[-1,1*1*128])
h_conv_ms_fc = tf.concat([W_conv_fc_s1,W_conv_fc_s2,W_conv_fc_s3],axis=1)
h_conv_ms_fc1 = tf.reshape(h_conv_ms_fc,(-1,1*1*4832))
# W_ms_fc0 = weight_variable([1*1*4832, 1024], name='W_fc_s2')
# b_ms_fc0 = bias_variable([1024], name='b_fc_s2')
# pre_land_mark0 = tf.add(tf.matmul(h_conv_ms_fc1,W_ms_fc0),b_ms_fc0)
W_ms_fc = weight_variable([1 * 1 * 4832, 136], name='W_fc_s2')
b_ms_fc = bias_variable([136], name='b_fc_s2')
h_conv_ms_fc1 = batch_norm(h_conv_ms_fc1,is_training=True)
pre_landmark = tf.add(tf.matmul(h_conv_ms_fc1, W_ms_fc), b_ms_fc, name='landmark_3')
"""
######################### 333最初用的 ################################
"""
concat1 = crop_and_concat(h_conv_s1,h_conv_s2) # (?,7,7,78)
concat2 = crop_and_concat(concat1,h_conv_s3) #(?,1,1,176)
h_conv_ms_fc = tf.reshape(concat2, [-1, 1 * 1 * 176])
W_ms_fc = weight_variable([1 * 1 * 176, 136], name='W_fc_s2')
b_ms_fc = bias_variable([136], name='b_fc_s2')
pre_landmark = tf.add(tf.matmul(h_conv_ms_fc, W_ms_fc), b_ms_fc, name='landmark_3')
"""
##### Part2: Auxiliary Network -- 副网络 #####
# layers1
# 副网络输入input: h_conv_b1 === [1,28,28,64]
with tf.name_scope('Funet-layers1'):
W_convfu_1 = weight_variable([3, 3, 64, 128],name='W_convfu_1')
b_convfu_1 = bias_variable([128],name='b_convfu_1')
h_convfu_1 = conv2d(h_conv_b1, W_convfu_1,strides=[1,2,2,1],padding='SAME') + b_convfu_1 # 14 * 14 * 128
# layers2
with tf.name_scope('Funet-layers2'):
W_convfu_2 = weight_variable([3,3,128,128],name='W_convfu_2')
b_convfu_2 = bias_variable([128],name='b_convfu_2')
h_convfu_2 = conv2d(h_convfu_1,W_convfu_2,strides=[1,1,1,1],padding='SAME') + b_convfu_2 # 14 * 14 * 128
# layers3
with tf.name_scope('Funet-layers3'):
W_convfu_3 = weight_variable([3,3,128,32],name='W_convfu_3')
b_convfu_3 = bias_variable([32],name='b_convfu_3')
h_convfu_3 = conv2d(h_convfu_2,W_convfu_3,strides=[1,2,2,1],padding='SAME') + b_convfu_3 # 7 * 7 * 32
# layers4
with tf.name_scope('Funet-layers4'):
W_convfu_4 = weight_variable([7,7,32,128],name='W_convfu_4')
b_convfu_4 = bias_variable([128],name='b_convfu_4')
h_convfu_4 = conv2d(h_convfu_3,W_convfu_4,strides=[1,1,1,1],padding='VALID') + b_convfu_4 # 1 * 1 * 128
####### Fc ######
# Fc1:
with tf.name_scope('Fc1'):
W_fu_fc1 = weight_variable([1 * 1 * 128, 32],name='W_fu_fc1')
b_fc_s1 = bias_variable([32],name='b_fc_s1')
h_convfu_4_fc = tf.reshape(h_convfu_4, [-1, 1 * 1 * 128]) # 1 * 128
pre_theat_s1 = tf.matmul(h_convfu_4_fc, W_fu_fc1) + b_fc_s1 # 1 * 32
# Fc2:
with tf.name_scope('Fc2'):
W_fu_fc2 = weight_variable([1 * 1 *32, 3],name='W_fu_fc2')
b_fc_s2 = bias_variable([3],name='b_fc_s2')
h_convfu_5_fc = tf.reshape(pre_theat_s1, [-1, 1 * 1 * 32]) # 1 * 32
pre_theat = tf.add(tf.matmul(h_convfu_5_fc, W_fu_fc2),b_fc_s2,name='pre_theta') # 1 * 3
return pre_landmark,pre_theat
主干网络中用到的一些函数这这里贴出:
# -*- coding: utf-8 -*-
"""
@author: Danbin
@time:2019.4.1
"""
import numpy as np
import tensorflow as tf
import cv2
import os
import glob
import h5py
slim = tf.contrib.slim
def read_all_path(path):
"""
获取文件夹下所有图片的路径
:path 图片所在路径
:return路径列表
"""
mark_path = []
for filename in glob.glob(path):
# cv2.imread(filename,1) # jpg
f, ext = os.path.splitext(filename)
mark = f + '.pts' # land_mark
mark_path.append(mark)
return mark_path
def readLmk(fileName):
"""
获取标注文件的关键点
:param fileName: 标注文件全路径
:return: list--关键点列表
"""
landmarks = []
if not os.path.exists(fileName):
return landmarks
else:
fp = open(fileName)
i = 0
for line in fp.readlines():
# print line.strip("\n")
TT = line.strip("\n")
if i > 2 and i <= 70:
# print(TT)
TT_temp = TT.split(" ")
# print(TT_temp)
x = float(TT_temp[0])
y = float(TT_temp[1])
landmarks.append((x, y))
i += 1
return landmarks
def get_data_and_label(path):
"""
获取图片路径下的Image和关键点
:param path: path
:return: Image,land_mark
"""
for filename in glob.glob(path):
Image = cv2.imread(filename)
f, ext = os.path.splitext(filename)
mark_path = f + '.pts' # land_mark
landmarks = readLmk(mark_path)
#####
# for point in landmarks:
# # print('point:', point)
# cv2.circle(Image, point, 1, color=(255, 255, 0))
# cv2.imshow('image', Image)
#####
# print(type(landmarks))
landmarks = np.array(landmarks)
# print('landmark:', landmarks)
# print('landmarks_shape:',landmarks.shape)
landmark = np.reshape(landmarks, [1, 136])
# print('finally_landmark_shape:',landmark.shape)
# cv2.waitKey(200)
# print('---***---' * 5)
return Image, landmark
def showlandmark(image_path, image_label):
"""
根据图片路径和标注信息路径--显示标注和图像是否统一
:param image_path:
:param image_label:
"""
img = cv2.imread(image_path, 1)
# width, height, c = img.shape
# print('image_shape:', width, height, c)
labelmarks = readLmk(image_label)
print('关键点个数:', len(labelmarks))
# for i in range(len(labelmarks)):
# print('labelmarks_%s:',i,labelmarks[i])
# point_x,point_y = float(labelmarks[i][0]),float(labelmarks[i][1])
# print(point_x,point_y)
# cv2.drawKeypoints(img,(point_x,point_y),img,color='g')
for point in labelmarks:
print('point:', point)
cv2.circle(img, point, 1, color=(16, 255, 10))
cv2.imwrite('000.jpg', img)
cv2.imshow('image_109',img)
cv2.waitKey(0)
def weight_variable(shape,name):
"""
W:权重参数初始化
:param shape: w_shape[w,h,c1,C2]
:return: W
"""
initial = tf.truncated_normal(shape, stddev=0.01,name=name)
return tf.Variable(initial)
def bias_variable(shape,name):
"""
b:偏执参数初始化
:param shape: w_shape[C2]
:return: b
"""
initial = tf.constant(0.01, shape=shape,name=name)
return tf.Variable(initial)
def conv2d(x, W,padding,strides=[1,2,2,1]):
"""定义卷积运算
:param x: input
:param W: W
:param padding:填充方式
:param strides: strides
:return:
"""
return tf.nn.conv2d(x, W, strides=strides, padding=padding)
def deepwise_conv2d(x,W):
"""反卷积运算
:param x: input
:param W: W
:return:
"""
return tf.nn.depthwise_conv2d(x,W,strides=[1,1,1,1],padding='SAME')
def max_pool_2x2(x):
"""最大池化操作
:param x: x
:return:
"""
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='VALID')
def batch_norm(value,is_training=True):
'''
批量归一化 返回批量归一化的结果
value:代表输入,第一个维度为batch_size
is_training:当它为True,代表是训练过程,这时会不断更新样本集的均值与方差。当测试时,要设置成False,
这样就会使用训练样本集的均值和方差。默认测试模式
name:名称。
'''
if is_training is True:
# 训练模式 使用指数加权函数不断更新均值和方差
return tf.layers.batch_normalization(value,training=True)
#return tf.contrib.layers.batch_norm(inputs=value, decay=0.9, updates_collections=None, is_training=True)
else:
# 测试模式 不更新均值和方差,直接使用
return tf.layers.batch_normalization(value,training=False)
#return tf.contrib.layers.batch_norm(inputs=value, decay=0.9, updates_collections=None, is_training=False)
def make_bottleneck_block(inputs, expantion, depth, stride,kernel=(3, 3)):
"""从块定义构造瓶颈块--Construct a bottleneck block from a block definition.
There are three parts in a bottleneck block:bottleneck block包含三个部分
1. 1x1 pointwise convolution with ReLU6, expanding channels to 'input_channel * expantion'
2. 3x3 depthwise convolution with ReLU6
3. 1x1 pointwise convolution
"""
# The depth of the block depends on the input depth and the expantion rate.
input_depth = inputs.get_shape().as_list()[-1] # input_chanel
# filter1 = ()
block_depth = input_depth * expantion # 扩展通道
# First do 1x1 pointwise convolution,relu6
inputs = batch_norm(inputs,is_training=True)
block1 = tf.layers.conv2d(inputs=inputs,filters=block_depth,kernel_size=[1, 1],
padding='same',activation=tf.nn.relu6)
# Second, do 3x3 depthwise convolution,relu6
# filter2 = (3,3,deep,1)
depthwise_kernel = tf.Variable(
tf.truncated_normal(shape=[kernel[0], kernel[1], block_depth, 1], stddev=0.001))
block2 = tf.nn.depthwise_conv2d_native(input=block1,filter=depthwise_kernel,
strides=stride,padding='SAME')
block2 = tf.nn.relu6(features=block2)
# Third, pointwise convolution.
block3 = tf.layers.conv2d(inputs=block2,filters=depth,kernel_size=[1, 1],padding='SAME')
if stride[1] == 1:
last_n_filter = input_depth # 输入通道数
if depth > last_n_filter:
shortcut = tf.layers.conv2d(inputs,depth,1,1)
elif depth < last_n_filter:
shortcut = tf.layers.conv2d(inputs, depth, 1, 1)
else:
shortcut = inputs
block3 = tf.add_n([block3,shortcut])
return block3
def subsample(inputs, factor, scope=None):
'''降采样方法:
factor:采样因子 1:不做修改直接返回 不为1:使用slim.max_pool2d降采样'''
if factor ==1:
return inputs
else:
return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
def bottleneck(inputs, expantion, depth, stride,kernel=(3, 3)):
"""
:param inputs: 输入
:param expantion: 扩展倍数
:param depth:最后的通道数
:param stride: stride
:param kernel: kernel
:return:
"""
input_depth = inputs.get_shape().as_list()[-1] # 输入通道
block_depth = input_depth * expantion # 扩展通道
# 对输入进行bn
inputs = slim.batch_norm(inputs, activation_fn=tf.nn.relu,
scope='inputs')
if depth == input_depth:
'''如果残差单元输入通道数和输出通道数一样使用subsample按步长对inputs进行空间上的降采样'''
shortcut = subsample(inputs, stride[1], 'shortcut')
else:
'''如果残差单元输入通道数和输出通道数不一样,
使用stride步长的1x1卷积改变其通道数,使得输入通道数和输出通道数一致'''
shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride[1],
normalizer_fn=None, activation_fn=None,
scope='shortcut')
'''定义残差:
第一步:1x1尺寸、步长为1、输出通道数为depth_bottleneck的卷积
第二步:3x3尺寸、步长为stride、输出通道数为depth_bottleneck的卷积
第三步:1x1尺寸、步长为1、输出通道数为depth的卷积'''
block1 = tf.layers.conv2d(inputs=inputs, filters=input_depth, kernel_size=[1, 1],
padding='same', activation=tf.nn.relu6)
depthwise_kernel = tf.Variable(
tf.truncated_normal(shape=[kernel[0], kernel[1], input_depth, expantion], stddev=0.01))
block2 = tf.nn.depthwise_conv2d_native(input=block1, filter=depthwise_kernel,
strides=stride, padding='SAME')
block2 = tf.nn.relu6(features=block2)
block3 = tf.layers.conv2d(inputs=block2, filters=input_depth, kernel_size=[1, 1], padding='SAME')
output = shortcut + block3
return output
def get_train_and_label(train_path):
with h5py.File(train_path, 'r') as f:
images = f['Images'][:]
labels = f['landmarks'][:]
oulaTheta = f['oulaTheta'][:]
X_train = images
Y_train = labels
Y_theta = oulaTheta
# print(X_train.shape) # (3111,112,3)
# # print(Y_train.shape) # (3111,136)
# # print(Y_theta.shape)
return X_train,Y_train,Y_theta
def crop_and_concat(x1, x2):
x1_shape = tf.shape(x1)
x2_shape = tf.shape(x2)
# offsets for the top left corner of the crop
# offsets:x1-x2//2 ==>中间部分
#begin = [0, (x1_shape[1] - x2_shape[1]) // 2, (x1_shape[2] - x2_shape[2]) // 2, x1_shape[3]]
begin = [0, (x1_shape[1] - x2_shape[1]) // 2, (x1_shape[2] - x2_shape[2]) // 2, 0]
size = [-1, x2_shape[1], x2_shape[2], -1]
# size = [-1, x2_shape[1], x2_shape[2], x1_shape[3]]
# 从x1中抽取部分内容,begin:n维列表,begin[i]表示从inputs中第i维抽取数据时,
# 相对0的起始偏移量,也就是从第i维的begin[i]开始抽取数据
# size:n维列表,size[i]表示要抽取的第i维元素的数目
x1_crop = tf.slice(x1, begin, size)
return tf.concat([x1_crop, x2], axis=3)
# # readLmk(image_path)
# # showlandmark(image_path,image_label)
这里论文的核心关于损失函数的设计,以及基于tf架构的训练代码就先不给出,如果执着于此的人可以私下问我,这里说明一下训练数据的格式是以hd5格式数据,因为我不仅仅在tensorflow上进行训练,还在caffe上进行了大量训练和测试,hd5格式是caffe和tf都支持的一种数据读取格式,所以这里就通用了。
最后训练的模型和测试结果这里一并享出来:
这里突然发现模型文件这里不能直接上传,那我会将其上传到自己的下载资源中,并且说明输入和输出的tensor的名称,通过加载模型,通过tensor name 在你的测试集上进行测试。