第一步:通过opencv读取摄像头,或本地存储的视频,获得人脸灰度图像,作为训练集,验证集,和测试集。
保存格式如下,例如训练集train_images 中有子目录: person0, person1, person2,…,每个子目录中放置训练用到的person名字的所有图片,格式如下:
实现代码:
import cv2
import os
import numpy as np
def create_dir(*args):
for item in args:
if not os.path.exists(item):
os.makedirs(item)
def get_padding_size(shape):
""" square rect 得到短边需填充的像素长度"""
h,w = shape
longest = max(w, h)
result = (np.array([longest]*4) - np.array([h,h,w,w]))//2
return result.tolist()
def resize_image(img, h=64, w=64):
""" 填充并裁剪图像, 使图像大小一致"""
top, bottom, left, right = get_padding_size(img.shape[0:2]) # 填充短边,使与长边一致
img = cv2.copyMakeBorder(img, top, bottom, left, right,
cv2.BORDER_CONSTANT, value=(0,0,0))
img = cv2.resize(img, (h, w))
return img
def relight(imgsrc, alpha=1, bias=0):
""" 改变图像的亮度,增强模型的泛化能力 """
imgsrc.astype("float")
imgsrc = imgsrc * alpha + bias
imgsrc[imgsrc < 0] = 0
imgsrc[imgsrc > 255] = 255
imgsrc.astype(np.uint8)
return imgsrc
def detect_face(n, frame, haar, outdir):
"""检测人脸并保存图像到outdir文件夹中,标记出人脸位置 """
# 生成灰度图,提高检测效率
img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = haar.detectMultiScale(img_gray, 1.3, 5)
for face_x, face_y, face_w, face_h in faces:
# 保存人脸图像
face = img_gray[face_x:face_x+face_w, face_y:face_y+face_h]
face = resize_image(face)
light_face = relight(face, np.random.uniform(0.5,1.5), np.random.randint(-50,50))
cv2.imwrite(os.path.join(outdir, '{}.jpg'.format(n)), light_face)
# 框出人脸
cv2.putText(frame, "name", (face_x, face_y-20),cv2.FONT_HERSHEY_COMPLEX,
1, 255, 2)
frame = cv2.rectangle(frame, (face_x, face_y),(face_x+face_w,face_y+face_h),
(255, 0,0), 2)
print("frame",frame.shape)
return frame
def get_face_from_camera(outdir):
""" 打开摄像头,调用detect_face函数, 获取人脸图像,并保存到outdir文件夹 """
create_dir(outdir)
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
print(size)
haar = cv2.CascadeClassifier("haarcascade_frontalface_default.xml") # haar特征检测人脸
# haar.load("haarcascade_frontalface_default.xml")
for i in range(140):
print("It`s processing {} image".format(i))
ret, frame = cap.read()
frame1 = detect_face(i, frame, haar, outdir)
cv2.imshow("frame", frame1)
# time.sleep(1)
k = cv2.waitKey(20)
if k & 0xff == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def get_face_from_local(local_path, outdir):
""" 读取本地视频,调用detect_face函数, 获取人脸图像,并保存到outdir文件夹"""
cap = cv2.VideoCapture(local_path)
size = (int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)))
create_dir(outdir)
haar = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
i = 0
while True:
ret, frame = cap.read()
if ret:
# 本地视频中图像过大时, 可裁剪并缩放
frame = frame[300:size[0],:]
frame = cv2.resize(frame, (400,400))
frame = detect_face(i, frame, haar, outdir)
i += 1
cv2.imshow("frame", frame)
k = cv2.waitKey(20)
if k & 0xff == ord('q'):
break
else:
break
cap.release()
cv2.destroyAllWindows()
if __name__=="__main__":
get_face_from_camera("train_images/person0")
第二步:训练CNN网络,并保存精度最高的一代(epoch)模型。
CNN网络:3层卷积-池化,2层全连接,最后softmax输出。
import os
import tensorflow as tf
import numpy as np
import PIL
def conv2d(input, weight_shape, bias_shape):
size = weight_shape[0] * weight_shape[1] * weight_shape[2]
weight_init = tf.random_normal_initializer(stddev=(2.0/size)**0.5)
bias_init = tf.constant_initializer(value=0)
w = tf.get_variable(name='w', shape=weight_shape, initializer=weight_init)
b = tf.get_variable(name='b', shape=bias_shape, initializer=bias_init)
conv_out = tf.nn.conv2d(input, w, strides=[1,1,1,1], padding="SAME")
return tf.nn.relu(tf.nn.bias_add(conv_out, b))
def max_pool(input, k=2):
return tf.nn.max_pool(input, ksize=[1,k,k,1],
strides=[1,k,k,1], padding="SAME")
def layer(input, weight_shape, bias_shape):
weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5)
bias_init = tf.constant_initializer(value=0)
w = tf.get_variable('w', shape=weight_shape, initializer=weight_init)
b = tf.get_variable('b', shape=bias_shape, initializer=bias_init)
output = tf.nn.relu(tf.matmul(input, w) + b)
return output
def inference(x):
x = tf.reshape(x, shape=[-1, 64, 64, 1])
with tf.variable_scope("conv_1"):
conv_1 = conv2d(x, [5, 5, 1, 32], [32])
pool_1 = max_pool(conv_1) # x.shape=[-1, 32,32,32]
with tf.variable_scope("conv_2"):
conv_2 = conv2d(pool_1, [3,3,32,64], [64])
pool_2 = max_pool(conv_2)
with tf.variable_scope("conv_3"):
conv_3 = conv2d(pool_2, [3,3,64,64], [64])
pool_3 = max_pool(conv_3)
with tf.variable_scope("fc_1"):
input = tf.reshape(pool_3, [-1, 8*8*64])
fc_1 = layer(input, [8*8*64, 64], [64])
fc_1_drop = tf.nn.dropout(fc_1, keep_prob=0.5)
with tf.variable_scope("output"):
output = layer(fc_1_drop, [64, 2], [2])
return output
def loss(output, y):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=y)
cost = tf.reduce_mean(cross_entropy)
return cost
def train(cost, global_step, learning_rate):
tf.summary.scalar("cost", cost)
# train_op = tf.train.GradientDescentOptimizer(learning_rate).\
# minimize(cost, global_step=global_step)
# 动量梯度优化
train_op = tf.train.MomentumOptimizer(learning_rate, 0.9).\
minimize(cost, global_step=global_step)
return train_op
def evaluate(output, y):
correct_predict = tf.equal(tf.argmax(output,axis=1), tf.argmax(y,axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
return accuracy
def read_image(images_dir):
"""
用于读取本地图像为array
每个人图像所在目录规则:train_images --> person0,person1,person2
validation_images --> person0, person1, person2
所有图像大小必须相同
"""
data = []
labels = []
fpaths = []
for dir in os.listdir(images_dir): # 共有几个人的图像,就有几个文件夹
for i in os.listdir(os.path.join(images_dir, dir)):
fpath = os.path.join(os.path.join(images_dir, dir), i)
image = PIL.Image.open(fpath)
image = np.array(image) / 255.0 # 对图像数据归一化
# print(image.shape)
label = int(dir[-1])
fpaths.append(fpath)
data.append(image) #
labels.append(label)
data = np.array(data)
labels = np.array(labels)
return fpaths, data, labels
def one_hot(labels, Label_class):
"""独热编码-->为label编码 --> 10000, 01000, 00100, 00010, 00001 """
one_hot_label = np.array([[int(i == int(labels[j]))
for i in range(Label_class)]
for j in range(len(labels))])
return one_hot_label
def local_image_train():
""" 用本地图像训练人脸识别模型 """
learning_rate = 0.01
training_epochs = 50
display_epoch = 1
batch_size = 32
# 获得训练集images和labels
fpaths, input, labels = read_image("train_images")
labels = one_hot(labels, 2)
# 打乱训练集数据的顺序
n = np.random.permutation(len(input))
input = input[n, :]
labels = labels[n, :]
print("shape of data:{}, shape of labels:{}".format(input.shape, labels.shape))
# 获得验证集images和labels
_, validation_images, validation_labels = read_image("validation_images")
validation_labels = one_hot(validation_labels, 2)
with tf.Session() as sess:
x = tf.placeholder(tf.float32, [None, 64,64])
y = tf.placeholder(tf.float32, [None,2])
output = inference(x)
cost = loss(output, y)
global_step = tf.Variable(0, trainable=False, name="global_step")
train_op = train(cost, global_step, learning_rate)
eval_op = evaluate(output, y)
# 保存图
summary_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter("summary_graph/",
graph=sess.graph)
# 保存精度最高一代的模型
saver = tf.train.Saver(max_to_keep=1)
init = tf.global_variables_initializer()
sess.run(init)
avg_cost = 0
max_acc = 0
for step in range(training_epochs):
# 训练模型
total_batch = input.shape[0]//batch_size
for i in range(total_batch):
input_batch = input[(i*batch_size):((i+1)*batch_size), :]
# print("input_patch",input_batch.shape)
label_batch = labels[(i*batch_size):((i+1)*batch_size), :]
feed_dict = {x: input_batch, y: label_batch}
sess.run(train_op, feed_dict=feed_dict)
mini_cost = sess.run(cost, feed_dict=feed_dict)
avg_cost += mini_cost/total_batch
if step % display_epoch == 0:
# 验证模型精度
accuracy = sess.run(eval_op, feed_dict={x:validation_images, y:validation_labels})
print("validation error is {}".format(1-accuracy))
summary_str = sess.run(summary_op, feed_dict=feed_dict)
summary_writer.add_summary(summary_str, sess.run(global_step))
# 只保存精度最高的一代模型
if accuracy > max_acc:
max_acc = accuracy
saver.save(sess, "cnn_model/network.ckpt", global_step=global_step)
print("Optimization Finished!")
sess.close()
if __name__ == "__main__":
# 用本地图像训练人脸识别模型
local_image_train()
第三步:恢复训练好的模型,用测试集测试模型精度。或调用摄像头,实现人脸识别,当图像库中的人脸出现在屏幕上时标记出人物姓名。
import tensorflow as tf
import cv2
from cnn_train import read_image, one_hot, inference
from opencv_collecting_face import resize_image
import numpy as np
def cnn_test():
_, test_images, test_labels = read_image("test_images")
test_labels = one_hot(test_labels, 2)
x = tf.placeholder("float", shape=[None, 64, 64])
output = inference(x)
with tf.Session() as sess:
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint("cnn_model/"))
predict = sess.run(output, feed_dict={x:test_images})
correct_predict = tf.equal(tf.argmax(predict, 1), tf.argmax(test_labels, 1))
print(sess.run(correct_predict))
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
print(sess.run(accuracy))
sess.close()
def face_recognition():
""" 打开摄像头,调用训练好的cnn网络进行人脸识别,并标出名字"""
cap = cv2.VideoCapture(0)
haar = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
x = tf.placeholder(tf.float32, shape=[64, 64])
with tf.Session() as sess:
output = inference(x)
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint("cnn_model/"))
while cap.isOpened():
ret, frame = cap.read()
gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = haar.detectMultiScale(gray_image, 1.3, 5)
for face_x, face_y, face_w, face_h in faces:
face = gray_image[face_x: face_x+face_w, face_y: face_y+face_h]
# 需要裁剪获得的人脸图像,用于训练好的cnn模型
face = resize_image(face)
# 归一化
face = face / 255.0
predict = sess.run(output, feed_dict={x: face}) # 预测结果是array,独热编码
label = tf.argmax(predict, 1) # tf.argmax()结果是(1,)
# print("label", label.shape)
label = int(sess.run(label))
label_name = ['person0', 'person1', 'person2']
# 预测结果label转化成人名
cv2.putText(frame, label_name[label], (face_x, face_y-20),
cv2.FONT_HERSHEY_COMPLEX, 1, 255, 2)
frame = cv2.rectangle(frame, (face_x, face_y),
(face_x+face_w, face_y+face_h),(255,0,0),2)
cv2.imshow("face recognition", frame)
if cv2.waitKey(20) & 0xff == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
sess.close()
if __name__=="__main__":
# cnn_test()
face_recognition()
若是用mnist数据集训练手写体识别,可以在第二步的程序中做以下修改:
# 将第2步程序中的inference函数修改为以下代码,增加mnist_test()函数,其余不变
def inference(x):
"""用mnist数据集训练的cnn网络"""
x = tf.reshape(x, shape=[-1, 28, 28, 1])
with tf.variable_scope("conv_1"):
conv_1 = conv2d(x, [5, 5, 1, 32], [32])
pool_1 = max_pool(conv_1) # x.shape=[-1, 32,32,32]
with tf.variable_scope("conv_2"):
conv_2 = conv2d(pool_1, [3,3,32,64], [64])
pool_2 = max_pool(conv_2)
with tf.variable_scope("conv_3"):
conv_3 = conv2d(pool_2, [3,3,64,64], [64])
pool_3 = max_pool(conv_3)
with tf.variable_scope("fc_1"):
input = tf.reshape(pool_3, [-1, 4*4*64])
fc_1 = layer(input, [4*4*64, 64], [64])
fc_1_drop = tf.nn.dropout(fc_1, keep_prob=0.5)
with tf.variable_scope("output"):
output = layer(fc_1_drop, [64, 10], [10])
return output
def mnist_test():
"""用mnist数据集训练手写体识别模型"""
with tf.Graph().as_default():
learning_rate = 0.01
training_epochs = 10
display_epoch = 1
batch_size = 256
# mnist数据集验证模型效果
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/data/", one_hot=True)
x = tf.placeholder('float', shape=[None,28*28])
y = tf.placeholder('float', shape=[None,10])
output = inference(x)
cost = loss(output, y)
global_step = tf.Variable(0, trainable=False, name="global_step")
train_op = train(cost, global_step=global_step, learning_rate=learning_rate)
eval_op = evaluate(output, y)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
avg_cost = 0
for step in range(training_epochs):
total_batch = int(mnist.train.num_examples/batch_size)
for i in range(5): # 运算太慢,因此只用部分image测试。
mbatch_x, mbatch_y = mnist.train.next_batch(total_batch)
# print("mbatch_x", mbatch_x.shape)
feed_dict = {x:mbatch_x, y:mbatch_y}
sess.run(train_op, feed_dict=feed_dict)
mini_cost = sess.run(cost, feed_dict=feed_dict)
avg_cost += mini_cost/total_batch
print(avg_cost)
if step % display_epoch == 0:
val_feed_fict = {x:mnist.validation.images, y:mnist.validation.labels}
accuracy = sess.run(eval_op, feed_dict=val_feed_fict)
print("validation error:", 1-accuracy)
print("optimization finished!")
if __name__ == "__main__":
# 用mnist数据集训练手写体识别模型
mnist_test()
实现效果:请自己尝试!