前段时间做过一个人脸识别的项目,刚开始做的时候发现了一个问题,就是当检测到目标人脸时,它的检测矩形框内只能出现英文,而当改成中文时会出现乱码的现象,所以在很多场景下,会极大的影响用户体验,当检测出人脸时,无法显示中文人名。
这是主文件的所有代码
import cv2
import os
import numpy as np
from net.mtcnn import mtcnn
import utils.utils as utils
from net.inception import InceptionResNetV1
class face_rec():
def __init__(self):
# 创建mtcnn对象
# 检测图片中的人脸
self.mtcnn_model = mtcnn()
# 门限函数
self.threshold = [0.5, 0.8, 0.9]
# 载入facenet
# 将检测到的人脸转化为128维的向量
self.facenet_model = InceptionResNetV1()
# model.summary()
model_path = './model_data/facenet_keras.h5'
self.facenet_model.load_weights(model_path)
# -----------------------------------------------#
# 对数据库中的人脸进行编码
# known_face_encodings中存储的是编码后的人脸
# known_face_names为人脸的名字
# -----------------------------------------------#
face_list = os.listdir("face_dataset")
self.known_face_encodings = []
self.known_face_names = []
for face in face_list:
name = face.split(".")[0]
img = cv2.imread("./face_dataset/" + face)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# 检测人脸
rectangles = self.mtcnn_model.detectFace(img, self.threshold)
# 转化成正方形
rectangles = utils.rect2square(np.array(rectangles))
# facenet要传入一个160x160的图片
rectangle = rectangles[0]
# 记下他们的landmark
landmark = (np.reshape(rectangle[5:15], (5, 2)) - np.array([int(rectangle[0]), int(rectangle[1])])) / (
rectangle[3] - rectangle[1]) * 160
crop_img = img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
crop_img = cv2.resize(crop_img, (160, 160))
new_img, _ = utils.Alignment_1(crop_img, landmark)
new_img = np.expand_dims(new_img, 0)
# 将检测到的人脸传入到facenet的模型中,实现128维特征向量的提取
face_encoding = utils.calc_128_vec(self.facenet_model, new_img)
self.known_face_encodings.append(face_encoding)
self.known_face_names.append(name)
def recognize(self, draw):
# -----------------------------------------------#
# 人脸识别
# 先定位,再进行数据库匹配
# -----------------------------------------------#
height, width, _ = np.shape(draw)
draw_rgb = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)
# 检测人脸
rectangles = self.mtcnn_model.detectFace(draw_rgb, self.threshold)
print(np.shape(rectangles))
if len(rectangles) == 0:
return
# 转化成正方形
rectangles = utils.rect2square(np.array(rectangles, dtype=np.int32))
rectangles[:, 0] = np.clip(rectangles[:, 0], 0, width)
rectangles[:, 1] = np.clip(rectangles[:, 1], 0, height)
rectangles[:, 2] = np.clip(rectangles[:, 2], 0, width)
rectangles[:, 3] = np.clip(rectangles[:, 3], 0, height)
# -----------------------------------------------#
# 对检测到的人脸进行编码
# -----------------------------------------------#
face_encodings = []
for rectangle in rectangles:
# 获取landmark在小图中的坐标
landmark = (np.reshape(rectangle[5:15], (5, 2)) - np.array([int(rectangle[0]), int(rectangle[1])])) / (
rectangle[3] - rectangle[1]) * 160
# 截取图像
crop_img = draw_rgb[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
crop_img = cv2.resize(crop_img, (160, 160))
# 对齐
new_img, _ = utils.Alignment_1(crop_img, landmark)
new_img = np.expand_dims(new_img, 0)
# 利用facenet_model计算128维特征向量
face_encoding = utils.calc_128_vec(self.facenet_model, new_img)
face_encodings.append(face_encoding)
face_names = []
for face_encoding in face_encodings:
# 取出一张脸并与数据库中所有的人脸进行对比,计算得分
matches = utils.compare_faces(self.known_face_encodings, face_encoding, tolerance=0.9)
name = "Unknown"
# 找出距离最近的人脸
face_distances = utils.face_distance(self.known_face_encodings, face_encoding)
# 取出这个最近人脸的评分
best_match_index = np.argmin(face_distances)
if matches[best_match_index]:
name = self.known_face_names[best_match_index]
face_names.append(name)
rectangles = rectangles[:, 0:4]
# -----------------------------------------------#
# 画框~!~
# -----------------------------------------------#
for (left, top, right, bottom), name in zip(rectangles, face_names):
cv2.rectangle(draw, (left, top), (right, bottom), (0, 0, 255), 2)
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(draw, name, (left, bottom - 15), font, 0.75, (255, 255, 255), 2)
return draw
if __name__ == "__main__":
dududu = face_rec()
video_capture = cv2.VideoCapture(1)
while True:
ret, draw = video_capture.read()
dududu.recognize(draw)
cv2.imshow('Video', draw)
if cv2.waitKey(20) & 0xFF == ord('q'):
break
video_capture.release()
cv2.destroyAllWindows()
import cv2
import os
import numpy as np
from net.mtcnn import mtcnn
import utils.utils as utils
from net.inception import InceptionResNetV1
from PIL import Image,ImageDraw,ImageFont
def change_cv2_draw(image,name,local,sizes,colour):
if (isinstance(image, np.ndarray)): #判断是否OpenCV图片类型
pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
draw1 = ImageDraw.Draw(pil_img) # 图片上打印
font = ImageFont.truetype("SIMLI.TTF",sizes, encoding="utf-8")
draw1.text(local, name, colour, font=font)
#PIL转opencv
image = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
return image
class face_rec():
def __init__(self):
# 创建mtcnn对象
# 检测图片中的人脸
self.mtcnn_model = mtcnn()
# 门限函数
self.threshold = [0.5,0.8,0.9]
# 载入facenet
# 将检测到的人脸转化为128维的向量
self.facenet_model = InceptionResNetV1()
# model.summary()
model_path = './model_data/facenet_keras.h5'
self.facenet_model.load_weights(model_path)
#-----------------------------------------------#
# 对数据库中的人脸进行编码
# known_face_encodings中存储的是编码后的人脸
# known_face_names为人脸的名字
#-----------------------------------------------#
face_list = os.listdir("face_dataset")
self.known_face_encodings=[]
self.known_face_names=[]
for face in face_list:
name = face.split(".")[0]
#img = cv2.imread("./face_dataset/"+face)
#img = cv2.imdecode(np.fromfile("./face_dataset/"+face, dtype=np.uint8), -1)
img=cv2.imdecode(np.fromfile("./face_dataset/"+face, dtype=np.uint8),cv2.IMREAD_COLOR)
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
# 检测人脸
rectangles = self.mtcnn_model.detectFace(img, self.threshold)
# 转化成正方形
rectangles = utils.rect2square(np.array(rectangles))
# facenet要传入一个160x160的图片
rectangle = rectangles[0]
# 记下他们的landmark
landmark = (np.reshape(rectangle[5:15],(5,2)) - np.array([int(rectangle[0]),int(rectangle[1])]))/(rectangle[3]-rectangle[1])*160
crop_img = img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
crop_img = cv2.resize(crop_img,(160,160))
new_img,_ = utils.Alignment_1(crop_img,landmark)
new_img = np.expand_dims(new_img,0)
# 将检测到的人脸传入到facenet的模型中,实现128维特征向量的提取
face_encoding = utils.calc_128_vec(self.facenet_model,new_img)
self.known_face_encodings.append(face_encoding)
self.known_face_names.append(name)
def recognize(self,draw):
#-----------------------------------------------#
# 人脸识别
# 先定位,再进行数据库匹配
#-----------------------------------------------#
height,width,_ = np.shape(draw)
draw_rgb = cv2.cvtColor(draw,cv2.COLOR_BGR2RGB)
# 检测人脸
rectangles = self.mtcnn_model.detectFace(draw_rgb, self.threshold)
if len(rectangles)==0:
return
# 转化成正方形
rectangles = utils.rect2square(np.array(rectangles,dtype=np.int32))
rectangles[:,0] = np.clip(rectangles[:,0],0,width)
rectangles[:,1] = np.clip(rectangles[:,1],0,height)
rectangles[:,2] = np.clip(rectangles[:,2],0,width)
rectangles[:,3] = np.clip(rectangles[:,3],0,height)
#-----------------------------------------------#
# 对检测到的人脸进行编码
#-----------------------------------------------#
face_encodings = []
for rectangle in rectangles:
landmark = (np.reshape(rectangle[5:15],(5,2)) - np.array([int(rectangle[0]),int(rectangle[1])]))/(rectangle[3]-rectangle[1])*160
crop_img = draw_rgb[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
crop_img = cv2.resize(crop_img,(160,160))
new_img,_ = utils.Alignment_1(crop_img,landmark)
new_img = np.expand_dims(new_img,0)
face_encoding = utils.calc_128_vec(self.facenet_model,new_img)
face_encodings.append(face_encoding)
face_names = []
for face_encoding in face_encodings:
# 取出一张脸并与数据库中所有的人脸进行对比,计算得分
matches = utils.compare_faces(self.known_face_encodings, face_encoding, tolerance = 0.9)
name = "我不认识你"
# 找出距离最近的人脸
face_distances = utils.face_distance(self.known_face_encodings, face_encoding)
# 取出这个最近人脸的评分
best_match_index = np.argmin(face_distances)
if matches[best_match_index]:
name = self.known_face_names[best_match_index]
face_names.append(name)
rectangles = rectangles[:,0:4]
#-----------------------------------------------#
# 画框~!~
#-----------------------------------------------#
for (left, top, right, bottom), name in zip(rectangles, face_names):
cv2.rectangle(draw, (left, top), (right, bottom), (0, 0, 255), 2)
#font=ImageFont.truetype('simhei.ttf',10,encoding="utf-8")
#font = cv2.FONT_HERSHEY_SIMPLEX
draw=change_cv2_draw(draw, name, (left, bottom - 30), 25, (255, 0, 0))
#cv2.putText(draw, name, (left, bottom - 15), font, 0.75, (255, 255, 255), 2)
cv2.imshow("video", draw)
return draw
if __name__ == "__main__":
dududu = face_rec()
video_capture = cv2.VideoCapture(0)
while True:
ret, draw = video_capture.read()
dududu.recognize(draw)
#cv2.imshow('Video', draw)
if cv2.waitKey(20) & 0xFF == ord('q'):
break
video_capture.release()
cv2.destroyAllWindows()
1 解决路径存在中文情况下图片读取
由于我的数据库中图片名称很多都是由中文命名的,放的是一些同学的人脸,嘻嘻! 然后再读取的报错中会报错,不能读取中文路径。
我把原来的:
img = cv2.imread("./face_dataset/"+face)
OpenCV下读取英文路径,改成了:
img=cv2.imdecode(np.fromfile("./face_dataset/"+face, dtype=np.uint8),cv2.IMREAD_COLOR)
当然,这个也是可以的:
img = cv2.imdecode(np.fromfile("./face_dataset/"+face, dtype=np.uint8), -1)
有好几种方法,看看自己喜欢用哪种,当然这样修改后,不但可以读取中文路径的图片,全是英文路径的同样适用。
2 opencv与PIL的转换
主要思路是,先将原来的cv2格式转化为PIL格式,由于PIL库支持中文输入,所以转化为PIL图片后写上中文,再将PIL图片转为cv2格式。
先定义了一个转化函数:
def change_cv2_draw(image,name,local,sizes,colour):
if (isinstance(image, np.ndarray)): #判断是否OpenCV图片类型
pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
draw1 = ImageDraw.Draw(pil_img) # 图片上打印
font = ImageFont.truetype("SIMLI.TTF",sizes, encoding="utf-8")
draw1.text(local, name, colour, font=font)
image = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
return image
调用函数:
draw=change_cv2_draw(draw, name, (left, bottom - 30), 25, (255, 0, 0))
可能细心的同学发现了:
for (left, top, right, bottom), name in zip(rectangles, face_names):
cv2.rectangle(draw, (left, top), (right, bottom), (0, 0, 255), 2)
#font=ImageFont.truetype('simhei.ttf',10,encoding="utf-8")
#font = cv2.FONT_HERSHEY_SIMPLEX
draw=change_cv2_draw(draw, name, (left, bottom - 30), 25, (255, 0, 0))
#cv2.putText(draw, name, (left, bottom - 15), font, 0.75, (255, 255, 255), 2)
cv2.imshow("video", draw)
return draw
if __name__ == "__main__":
dududu = face_rec()
video_capture = cv2.VideoCapture(0)
while True:
ret, draw = video_capture.read()
dududu.recognize(draw)
#cv2.imshow('Video', draw)
if cv2.waitKey(20) & 0xFF == ord('q'):
break
video_capture.release()
cv2.destroyAllWindows()
我把cv2.imshow(“video”, draw)从下面的主函数下改到了上面,因为当这句代码还是放在下面的时候,边框是不会出现文字的(乱码也不会出现)。还有就是,我在代码里面设置了一些参数,都是凭个人喜好,检测矩形框内的字体颜色,大小,字体都可以自己设置,只要不超过范围,还是可以玩一玩的。