- 卷积神经网络使用caffe框架
- 实现方案参考《Deep Convolutional Network Cascade for Facial Point Detection》论文
原理:如下图所示,输入为raw image,输出为5个关键位置的坐标,属于回归任务,因此最后一层设置为logistic regress,也就是全连接层即可,损失函数为二范数(欧几里得层)。论文中声明说使用 unshare weight convolutional kernel会好些,由于caffe中还没有实现此功能,我直接用share weight,其实,若采用unshare weight,可能顶多高两三个百分点的精度,因为tangxiaoou在一篇论文中对这两种形式比较过。在prototxt设置中,除了没有unshare weight,其他都如文中所述,但是训练过程中loss出现了NIN...无法收敛,把学习率调到极低也没有用,然后我把论文中所说的tanh层和abs层去掉了,直接用relu了,实验的效果不错。由于只是做实验,我只作为level 1 中 CNN F1 这一步。
-------由于输出是多个坐标,是multi-label,因此采用hdf5 layer, 保存数据为hdf5格式如下所示:
import os
import numpy as np
import h5py
import cv2
import math
num_cols = 1 #ͨµÀÊýÁ¿
num_rows = 10000 #Ñù±¾ÊýÁ¿#10000train 3466validation
height = 39 #ͼÏñ¸ß¶È
width = 39 #ͼÏñ¿í¶È
labeldim=10
total_size = num_cols * num_rows * height * width
data=np.zeros((num_rows,num_cols,height,width))
data=data.astype('float32')
label=np.zeros((num_rows,labeldim)) #?
label=label.astype('float32');
##¶ÁͼÏñ
dirname='C:\\Users\\liang\\Desktop\\train\\'
filename='C:\\Users\\liang\\Desktop\\train\\trainImageList.txt'
f = open(filename)
line = f.readline()
i=0
while line:
print i
content=line.split(' ') #15¸ö
content[1:]=[int(math.floor(float(j))) for j in content[1:]]
imgname=dirname+content[0]
img=cv2.imread(imgname)
img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
face=img[content[3]:content[4],content[1]:content[2]]
face=cv2.resize(face,(height,width))
face=face.astype('float32')
face=face*(1./255)
data[i,0,:,:]=face
facewidth=content[4]-content[3]+1
faceheight=content[2]-content[1]+1
center_x=content[1]+math.floor(facewidth/2)
center_y=content[3]+math.floor(faceheight/2)
facepoint=content[5:]
facepoint[0::2]=[float(j-center_x)/(float(facewidth)/2) for j in facepoint[0::2]]
facepoint[1::2]=[float(j-center_y)/(float(faceheight)/2) for j in facepoint[1::2]]
for j in facepoint:
assert(1>=j>=0 or 0>=j>=-1)
label[i,:]=facepoint
line = f.readline()
i+=1
with h5py.File(os.getcwd()+ '/train_data.h5', 'w') as f:
f['data'] = data
f['label'] = label
with open(os.getcwd() + '/train_data_list.txt', 'w') as f:
f.write(os.getcwd() + '/train_data.h5\n')
---solver:
# The training protocol buffer definition
net: "facialpoint_train_test.prototxt"
# The testing protocol buffer definition
# test_iter specifies how many forward passes the test should carry out.
# In the case of facialpoint, we have test batch size 80 and 43 test iterations,
# covering the full 10,000 testing images.
test_iter: 43
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 1e-3
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 100000
# snapshot intermediate results
snapshot: 20000
snapshot_prefix: "facialpoint"
# solver mode: CPU or GPU
solver_mode: GPU
---prototxt:
name: "facialpoint"
layers {
name: "MyData"
type: HDF5_DATA
top: "data"
top: "label"
hdf5_data_param {
source: "train_data_list.txt"
batch_size: 100
}
include: { phase: TRAIN }
}
layers {
name: "MyData"
type: HDF5_DATA
top: "data"
top: "label"
hdf5_data_param {
source: "test_data_list.txt"
batch_size: 80
}
include: { phase: TEST }
}
layers {
name: "conv1"
type: CONVOLUTION
bottom: "data"
top: "conv1"
blobs_lr: 1
blobs_lr: 2
convolution_param {
num_output: 20
kernel_size: 4
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu1"
type: RELU
bottom: "conv1"
top: "conv1"
}
layers {
name: "pool1"
type: POOLING
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
name: "conv2"
type: CONVOLUTION
bottom: "pool1"
top: "conv2"
blobs_lr: 1
blobs_lr: 2
convolution_param {
num_output: 40
kernel_size: 3
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu2"
type: RELU
bottom: "conv2"
top: "conv2"
}
layers {
name: "pool2"
type: POOLING
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
name: "conv3"
type: CONVOLUTION
bottom: "pool2"
top: "conv3"
blobs_lr: 1
blobs_lr: 2
convolution_param {
num_output: 60
kernel_size: 3
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu3"
type: RELU
bottom: "conv3"
top: "conv3"
}
layers {
name: "pool3"
type: POOLING
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
name: "conv4"
type: CONVOLUTION
bottom: "pool3"
top: "conv4"
blobs_lr: 1
blobs_lr: 2
convolution_param {
num_output: 80
kernel_size: 2
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu4"
type: RELU
bottom: "conv4"
top: "conv4"
}
layers {
name: "fc5"
type: INNER_PRODUCT
bottom: "conv4"
top: "fc5"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 120
weight_filler {
type: "gaussian"
std:0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "fc6"
type: INNER_PRODUCT
bottom: "fc5"
top: "fc6"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 10
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "loss"
type: EUCLIDEAN_LOSS
bottom: "fc6"
bottom: "label"
top: "loss"
}
-----deploy:
name: "facialpoint"
input: "data"
input_dim: 1
input_dim: 1
input_dim: 39
input_dim: 39
layers {
name: "conv1"
type: CONVOLUTION
bottom: "data"
top: "conv1"
convolution_param {
num_output: 20
kernel_size: 4
stride: 1
}
}
layers {
name: "relu1"
type: RELU
bottom: "conv1"
top: "conv1"
}
layers {
name: "pool1"
type: POOLING
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
name: "conv2"
type: CONVOLUTION
bottom: "pool1"
top: "conv2"
convolution_param {
num_output: 40
kernel_size: 3
stride: 1
}
}
layers {
name: "relu2"
type: RELU
bottom: "conv2"
top: "conv2"
}
layers {
name: "pool2"
type: POOLING
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
name: "conv3"
type: CONVOLUTION
bottom: "pool2"
top: "conv3"
convolution_param {
num_output: 60
kernel_size: 3
stride: 1
}
}
layers {
name: "relu3"
type: RELU
bottom: "conv3"
top: "conv3"
}
layers {
name: "pool3"
type: POOLING
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
name: "conv4"
type: CONVOLUTION
bottom: "pool3"
top: "conv4"
convolution_param {
num_output: 80
kernel_size: 2
stride: 1
}
}
layers {
name: "relu4"
type: RELU
bottom: "conv4"
top: "conv4"
}
layers {
name: "fc5"
type: INNER_PRODUCT
bottom: "conv4"
top: "fc5"
inner_product_param {
num_output: 120
}
}
layers {
name: "relu4"
type: RELU
bottom: "fc5"
top: "fc5"
}
layers {
name: "fc6"
type: INNER_PRODUCT
bottom: "fc5"
top: "fc6"
inner_product_param {
num_output: 10
}
}
检测代码:
import os
import matplotlib.pyplot as plt
import cv2
print os.getcwd()
os.chdir('/home/chenliangren/caffe-master')
print os.getcwd()
import caffe
net=caffe.Classifier('aa.prototxt','aa2.caffemodel')
net.set_phase_test()
net.set_mode_gpu()
##img=cv2.imread('9.jpg');
##img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
img=caffe.io.load_image('9.jpg',False)
prediction = net.predict([img])
print 'prediction shape:', prediction[0].shape
points=prediction[0]
recoverfacepoint=points
s=39;
recoverfacepoint[0::2]=[int(j*39.0/2+39.0/2) for j in points[0::2]]
recoverfacepoint[1::2]=[int(j*39.0/2+39.0/2) for j in points[1::2]]
face=caffe.io.load_image('9.jpg',True)
cv2.circle(face,(recoverfacepoint[0],recoverfacepoint[1]),1,(255,0,0),1)
cv2.imshow('face',face)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.circle(face,(recoverfacepoint[2],recoverfacepoint[3]),1,(255,0,0),1)
cv2.imshow('face',face)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.circle(face,(recoverfacepoint[4],recoverfacepoint[5]),1,(255,0,0),1)
cv2.imshow('face',face)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.circle(face,(recoverfacepoint[6],recoverfacepoint[7]),1,(255,0,0),1)
cv2.circle(face,(recoverfacepoint[8],recoverfacepoint[9]),1,(255,0,0),1)
cv2.imshow('face',face)
cv2.waitKey(0)
cv2.destroyAllWindows()
人脸关键点数据:
http://mmlab.ie.cuhk.edu.hk/archive/CNN_FacePoint.htm