python dlib实现小程序(2)实现人脸检测
数据集产生:
# coding=utf-8
import dlib
import numpy as np
import cv2
import os
import json
current_path = os.getcwd()
detect0r = dlib.cnn_face_detection_model_v1(current_path+"\mmod_human_face_detector.dat")
sp =dlib.shape_predictor(current_path+"\shape_predictor_68_face_landmarks.dat")
facerec = dlib.face_recognition_model_v1(current_path+"\dlib_face_recognition_resnet_model_v1.dat")
imagepath = "E:/code/dlib/data/"
data = np.zeros((1,128))
label= []
for file in os.listdir(imagepath):
if '.jpg' in file or '.png' in file :
filename = file
labelname = file.split('_')[0]
img = cv2.imread(imagepath+file)
if img.shape[0]*img.shape[1] >500000:
img = cv2.resize(img,(0,0),fx=0.5,fy=0.5)
dets = detect0r(img,1)
for k ,d in enumerate(dets):
rec = dlib.rectangle(d.rect.left(),d.rect.top(),d.rect.right(),d.rect.bottom())
shape =sp(img,rec)
face_descriptor = facerec.compute_face_descriptor(img,shape)
faceArray = np.array(face_descriptor).reshape((1,128))
data = np.concatenate((data,faceArray))
label.append(labelname)
cv2.rectangle(img,(rec.left(),rec.top()),(rec.right(),rec.bottom()),(0,255,0),2)
data = data[1:,:]
np.savetxt('facedata.txt',data,fmt='%f')
labelfile = open('label.txt','w')
json.dump(label,labelfile)
labelfile.close()
cv2.destroyAllWindows()
我的照片的格式为:标签名+下划线+序号+点号+后缀名
上面代码中的模型参数可以到这里下载:http://dlib.net/files/。detector是使用卷积神经网络(CNN)进行人脸检测的检测算子,变量sp,使用预测算子获取得到的人脸区域中的五官的几何点区域,这里加载的是68特征点的landmark模型;然后facerec会得到ResNet模型,He Kaiming(2009年和2015的CVPR best paper作者)提出的方法的一个实现,这里训练模型已经给出,因此不需要自己手动去训练了。
实现模块:
# coding=utf-8
import dlib
import numpy as np
import cv2
import os
import json
current_path = os.getcwd()
detector = dlib.cnn_face_detection_model_v1(current_path+"\mmod_human_face_detector.dat")
#detector = dlib.get_frontal_face_detector()
sp =dlib.shape_predictor(current_path+"\shape_predictor_68_face_landmarks.dat")
facerec = dlib.face_recognition_model_v1(current_path+"\dlib_face_recognition_resnet_model_v1.dat")
threshold = 0.54
def findnearestclassforimage(face_descriptor,facelabel1):
temp = face_descriptor - data
e = np.linalg.norm(temp,axis=1,keepdims=True)
min_distance = e.min()
if min_distance > threshold:
return 'other'
index = np.argmin(e)
return facelabel1[index]
def recignition(img):
dets = detector(img,1)
for k ,d in enumerate(dets):
rec = dlib.rectangle(d.rect.left(),d.rect.top(),d.rect.right(),d.rect.bottom())
shape = sp(img,rec)
face_descriptor = facerec.compute_face_descriptor(img,shape)
class_pre = findnearestclassforimage(face_descriptor,label)
cv2.rectangle(img,(rec.left(),rec.top()+10),(rec.right(),rec.bottom()),(0,255,0),2)
cv2.putText(img,class_pre,(rec.left(),rec.top()),cv2.FONT_HERSHEY_SCRIPT_SIMPLEX,0.7,(0,255,0),2,cv2.LINE_AA)
cv2.imshow('image',img)
labelFile = open('label.txt','r')
label = json.load(labelFile)
labelFile.close()
data = np.loadtxt('facedata.txt',dtype=float)
cap = cv2.VideoCapture(0)
while True:
ret ,frame = cap.read()
recignition(frame)
if cv2.waitKey(1) & 0Xff == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
其中temp = face_descriptor - data ,face_descriptor.shape 是(128,1)【dlib.face_recognition_model_v1输出是128个人脸特征点】data.shape 是 (5,128),两者相减是可行的,但是笔者测试了这个:
a =np.ones(128).reshape((128,1))
print(a.shape)
b = np.ones(640).reshape((5,128))
print(b.shape)
c = a - b
报错是因为 ValueError: operands could not be broadcast together with shapes (128,1) (5,128)
笔者再查询了face_descriptor 的type ,查到是dlib.vector 笔者猜测是这个类型的可以再做减法的时候进行广播。
原po:https://www.cnblogs.com/supersayajin/p/8489435.html