深度学习框架模型均采用python语言进行开发,而传统的应用程序均采用C++开发,OpenCV dnn从3.4开始提供了深度学习框架模型,可以访问tensorflow、caffe等深度学习模型,采用C++开发的传统应用程序也可以利用这一接口实现深度学习功能模块,本文以人脸表情数据集为例,说明利用keras模型建立深度学习框架,再转换为tensorflow模型,然后利用OpenCV dnn调用学习模型,实现表情分类。
1. 利用keras建立深度学习模型
(1)import需要的库文件
import keras
import os,shutil
from keras import layers
from keras import models
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
keras.__version__
(2)根据数据集所在文件夹,读取数据模型,数据分为训练数据集train、验证数据集validation、测试数据集test
base_dir = 'E:/python learn/faceDetect/data'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
train_smile_dir = os.path.join(train_dir, 'smile')
train_nosmile_dir = os.path.join(train_dir, 'nosmile')
validation_smile_dir = os.path.join(validation_dir, 'smile')
validation_nosmile_dir = os.path.join(validation_dir, 'nosmile')
print('total training smile images:', len(os.listdir(train_smile_dir)))
print('total training nosmile images:', len(os.listdir(train_nosmile_dir)))
print('total validation smile images:', len(os.listdir(validation_smile_dir)))
print('total validation nosmile images:', len(os.listdir(validation_nosmile_dir)))
此时程序输出:
total training smile images: 2162
total training nosmile images: 1837
total validation smile images: 2162
total validation nosmile images: 1837
(3)建立模型
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer=optimizers.RMSprop(lr=1e-4),
metrics=['acc'])
model.summary()
(4)训练模型
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
# This is the target directory
train_dir,
# All images will be resized to 150x150
target_size=(150, 150),
batch_size=20,
# Since we use binary_crossentropy loss, we need binary labels
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=20,
class_mode='binary')
for data_batch, labels_batch in train_generator:
print('data batch shape:', data_batch.shape)
print('labels batch shape:', labels_batch.shape)
break
history = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=30,
validation_data=validation_generator,
validation_steps=50)
(5)保存模型
model.save('E:/python learn/faceDetect/smile_and_nosmile.h5')
2. 模型转换,将keras模型文件h5转换为pb模型
import keras
import tensorflow as tf
import os
#这个函数参考自网上
def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
"""
Freezes the state of a session into a pruned computation graph.
Creates a new computation graph where variable nodes are replaced by
constants taking their current value in the session. The new graph will be
pruned so subgraphs that are not necessary to compute the requested
outputs are removed.
@param session The TensorFlow session to be frozen.
@param keep_var_names A list of variable names that should not be frozen,
or None to freeze all the variables in the graph.
@param output_names Names of the relevant graph outputs.
@param clear_devices Remove the device directives from the graph for better portability.
@return The frozen graph definition.
"""
graph = session.graph
with graph.as_default():
freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
output_names = output_names or []
output_names += [v.op.name for v in tf.global_variables()]
input_graph_def = graph.as_graph_def()
if clear_devices:
for node in input_graph_def.node:
node.device = ''
frozen_graph = tf.graph_util.convert_variables_to_constants(
session, input_graph_def, output_names, freeze_var_names)
return frozen_graph
if __name__ == '__main__':
input_path = './'
#keras训练保存的h5文件
input_file = 'smile_and_nosmile.h5'
weight_file_path = os.path.join(input_path, input_file)
output_graph_name = weight_file_path[:-3] + '.pb'
# 加载模型
keras.backend.set_learning_phase(0)
h5_model = keras.models.load_model(weight_file_path)
frozen_graph = freeze_session(keras.backend.get_session(), output_names=[out.op.name for out in h5_model.outputs])
tf.train.write_graph(frozen_graph, input_path, output_graph_name, as_text=False)
print('Finished')
import cv2
model = cv2.dnn.readNetFromTensorflow("smile_and_nosmile.pb")
print('Load')
3. 预测表情,利用python
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 19 10:39:05 2020
@author: Yonghong Li
"""
import cv2
from keras.preprocessing import image
from keras.models import load_model
import numpy as np
def get_face_data():
# 加载Haar级联数据文件,用于检测人面
face_casecade=cv2.CascadeClassifier('C:/opencv/sources/data/haarcascades/haarcascade_frontalface_default.xml')
model = load_model('smile_and_nosmile.h5')
font = cv2.FONT_HERSHEY_SIMPLEX
#
camera = cv2.VideoCapture(0)
count = 1
while True:
ret, frame = camera.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 人面识别。detectMultiScale参数说明:
# gray: 进行检测的图像, 这里是转换后的。
# scaleFactor: 官网文档说是每次图片缩小的比例, 其实可以这么理解, 距离相机不同的距离, 物体大小是不一样的, 在物体大小不一致的情况下识别一个东西是不方便的, 这就需要进行多次的缩放, 这就是这个参数的作用。
# minNeighbors: 可以理解为每次检测时, 对检测点(Scale)周边多少有效点同时检测, 因为可能选取的检测点大小不足而导致遗漏。
# minSize: 检测点的最小值, 或者说就是检测点的最终值。
faces = face_casecade.detectMultiScale(gray,1.3,5)
# # 画出面部位置
for (x, y, w, h) in faces:
img = cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
# # 根据检查的位置截取图片并调整截取后的图片大小
f = cv2.resize(frame[y:y+h, x:x+w], (150, 150))
img1=cv2.cvtColor(f,cv2.COLOR_BGR2RGB)
img1 = np.array(img1)/255.
img_tensor = img1.reshape(-1,150,150,3)
prediction = model.predict(img_tensor)
if prediction[0][0]<0.5:
result='nosmile'
else:
result='smile'
cv2.putText(frame, result, (x,y), font, 2, (255, 0, 0), 2, cv2.LINE_AA)
#cv2.imshow('face',f)
count += 1
#cv2.imsow("tes",img_tensor)
cv2.imshow('pic', frame)
# 停止程序
if cv2.waitKey(120) & 0xff == ord('q'):
break
camera.release()
cv2.destroyAllWindows()
if __name__=='__main__':
get_face_data()
4. 预测表情,按照openCV dnn实现
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 28 09:23:18 2020
@author: Yonghong Li
"""
import cv2
import numpy as np
#from cv2.dnn import dnn
def get_face_data():
# 加载Haar级联数据文件,用于检测人面
face_casecade=cv2.CascadeClassifier('C:/opencv/sources/data/haarcascades/haarcascade_frontalface_default.xml')
net = cv2.dnn.readNetFromTensorflow('smile_and_nosmile.pb')
font = cv2.FONT_HERSHEY_SIMPLEX
camera = cv2.VideoCapture(0)
count = 1
while True:
ret, frame = camera.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 人面识别。detectMultiScale参数说明:
# gray: 进行检测的图像, 这里是转换后的。
# scaleFactor: 官网文档说是每次图片缩小的比例, 其实可以这么理解, 距离相机不同的距离, 物体大小是不一样的, 在物体大小不一致的情况下识别一个东西是不方便的, 这就需要进行多次的缩放, 这就是这个参数的作用。
# minNeighbors: 可以理解为每次检测时, 对检测点(Scale)周边多少有效点同时检测, 因为可能选取的检测点大小不足而导致遗漏。
# minSize: 检测点的最小值, 或者说就是检测点的最终值。
faces = face_casecade.detectMultiScale(gray,1.3,5)
# # 画出面部位置
for (x, y, w, h) in faces:
img = cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
# # 根据检查的位置截取图片并调整截取后的图片大小
f = cv2.resize(frame[y:y+h, x:x+w], (150, 150))
# img_tensor = cv2.dnn.blobFromImages(frame[y:y+h, x:x+w], 1.0 / 255.0,(150, 150))
#img1=cv2.cvtColor(f,cv2.COLOR_BGR2RGB)
# img1 = np.array(img1)/255.
# img_tensor = img1.reshape(-1,150,150,3)
img_tensor = cv2.dnn.blobFromImage(f, 1 / 255.0, (150, 150), swapRB=True, crop=False)
net.setInput(img_tensor)
# cv2.imshow('img',img_tensor)
ln = net.getUnconnectedOutLayersNames()
prediction = net.forward(ln)
print(prediction)
if prediction[0][0]<0.5:
result='nosmile'
else:
result='smile'
cv2.putText(frame, result, (x,y), font, 2, (255, 0, 0), 2, cv2.LINE_AA)
# #cv2.imshow('face',f)
count += 1
cv2.imshow('pic', frame)
# 停止程序
if cv2.waitKey(120) & 0xff == ord('q'):
break
camera.release()
cv2.destroyAllWindows()
if __name__=='__main__':
get_face_data()
完整代码及数据集访问:https://download.csdn.net/download/mr_liyonghong/12785376