mtcnn加facenet实现一张照片就能实时识别人脸

自己写的代码懒得解释了,看的懂就看,代码是没有任何问题的,只有创建一个个人文件夹,在个人文件夹里面放一张照片,就可以进行实时人脸识别了。我会在最后附上github,github的readme里面我给了facenet模型下载地址,facenet是用了构建mtcnn的模型,构建mtcnn后在运用mtcnn得到人脸的128特征向量,在判断欧式距离看是不是同一个人,好了我就解释这么多了。。
先运行untitled里面的代码
from future import absolute_import
from future import division
from future import print_function

import cv2
import csv
from os.path import join as pjoin
import matplotlib.pyplot as plt

import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.externals import joblib

from scipy import misc
import tensorflow as tf
import numpy as np
import sys
import os
import copy
import argparse
import facenet
import align.detect_face

minsize = 20 # minimum size of face
threshold = [ 0.6, 0.7, 0.7 ] # three steps’s threshold
factor = 0.709 # scale factor

创建mtcnn网络,并加载参数

print(‘Creating networks and loading parameters’)
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

def load_and_align_data(image, image_size, margin, gpu_memory_fraction):

# 读取图片 
img = image
# 获取图片的shape
img_size = np.asarray(img.shape)[0:2]
# 返回边界框数组 (参数分别是输入图片 脸部最小尺寸 三个网络 阈值 factor不清楚)
bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)

# 如果检测出图片中不存在人脸 则直接返回,return 0(表示不存在人脸,跳过此图)
if len(bounding_boxes) < 1:
    return 0,0,0
else:
    crop=[]
    det=bounding_boxes

    det[:,0]=np.maximum(det[:,0], 0)
    det[:,1]=np.maximum(det[:,1], 0)
    det[:,2]=np.minimum(det[:,2], img_size[1])
    det[:,3]=np.minimum(det[:,3], img_size[0])

    # det[:,0]=np.maximum(det[:,0]-margin/2, 0)
    # det[:,1]=np.maximum(det[:,1]-margin/2, 0)
    # det[:,2]=np.minimum(det[:,2]+margin/2, img_size[1])
    # det[:,3]=np.minimum(det[:,3]+margin/2, img_size[0])

    det=det.astype(int)

    for i in range(len(bounding_boxes)):
        temp_crop=img[det[i,1]:det[i,3],det[i,0]:det[i,2],:]
        aligned=misc.imresize(temp_crop, (image_size, image_size), interp='bilinear')
        prewhitened = facenet.prewhiten(aligned)
        crop.append(prewhitened)
    crop_image=np.stack(crop)
        
    return det,crop_image,1

# np.squeeze() 降维,指定第几维,如果那个维度不是1  则无法降维
# det = np.squeeze(bounding_boxes[0,0:4])

def to_rgb(img):
w, h = img.shape
ret = np.empty((w, h, 3), dtype=np.uint8)
ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
return ret

def load_data(data_dir):

# data为字典类型 key对应人物分类 value为读取的一个人的所有图片 类型为ndarray
data = {}
pics_ctr = 0
for guy in os.listdir(data_dir):
    person_dir = pjoin(data_dir, guy)       
    curr_pics = [read_img(person_dir, f) for f in os.listdir(person_dir)]         

    # 存储每一类人的文件夹内所有图片
    data[guy] = curr_pics      
return data

def read_img(person_dir,f):
img=cv2.imread(pjoin(person_dir, f))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 判断数组维度

if gray.ndim == 2:
    img = to_rgb(gray)
return img

模型位置

model_dir=’./20170512-110547’
with tf.Graph().as_default():
with tf.Session() as sess:
# 加载facenet模型
facenet.load_model(model_dir)

    # 返回给定名称的tensor
    images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
    embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
    phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")

    # 从训练数据文件夹中加载图片并剪裁,最后embding,data为dict
    data=load_data('./train_dir/')

    # keys列表存储图片文件夹类别(几个人)
    keys=[]
    for key in data:
        keys.append(key)
        print('folder:{},image numbers:{}'.format(key,len(data[key])))

    # 使用mtcnn模型获取每张图中face的数量以及位置,并将得到的embedding数据存储
    for n in range(len(keys)):
        for x in data[keys[n]]:
            _,images_me,i = load_and_align_data(x, 160, 44, 1.0)
            if i:
                feed_dict = { images_placeholder: images_me, phase_train_placeholder:False }
                emb = sess.run(embeddings, feed_dict=feed_dict) 
                for xx in range(len(emb)):
                    emb=list(emb[xx,:])
                    emb.append(keys[n])
                    with open('face_feature.csv', "a+", newline="") as csvfile:
                        writer = csv.writer(csvfile)
                        writer.writerow(emb)

这里主要是通过mtcnn和 facenet获取人脸的128特征向量
接下来运行untitled1的代码:
import pandas as pd
path_feature_known_csv = “./face_feature.csv”
csv_rd = pd.read_csv(path_feature_known_csv, header=None,encoding=‘gbk’)
features_known_arr = []
for i in range(csv_rd.shape[0]):
features_someone_arr = []
for j in range(0, len(csv_rd.ix[i, :])):
features_someone_arr.append(csv_rd.ix[i, :][j])
# print(features_someone_arr)
features_known_arr.append(features_someone_arr)
print(“数据库人脸数:”, len(features_known_arr))


from future import absolute_import
from future import division
from future import print_function

import cv2
from scipy import misc
import tensorflow as tf
import numpy as np
import sys
import os
import copy
import argparse
import facenet
import align.detect_face
import random

from os.path import join as pjoin
import matplotlib.pyplot as plt

import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.externals import joblib

def return_euclidean_distance(feature_1, feature_2):
feature_1 = np.array(feature_1)
feature_2 = np.array(feature_2)
dist = np.sqrt(np.sum(np.square(feature_1 - feature_2)))
print("欧式距离: ", dist)

if dist > 0.8:
    return "diff"
else:
    return "same"

#face detection parameters
minsize = 20 # minimum size of face
threshold = [ 0.6, 0.7, 0.7 ] # three steps’s threshold 三步的阈值
factor = 0.709 # scale factor 比例因子

模型位置

model_dir=’./20170512-110547’#“Directory containing the graph definition and checkpoint files.”)

def to_rgb(img):
w, h = img.shape
ret = np.empty((w, h, 3), dtype=np.uint8)
ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
return ret

def read_img(person_dir,f):
img=cv2.imread(pjoin(person_dir, f))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 判断数组维度
if gray.ndim == 2:
img = to_rgb(gray)
return img

def load_data(data_dir):
data = {}
pics_ctr = 0
for guy in os.listdir(data_dir):
person_dir = pjoin(data_dir, guy)
curr_pics = [read_img(person_dir, f) for f in os.listdir(person_dir)]
# 存储每一类人的文件夹内所有图片
data[guy] = curr_pics
return data

minsize = 20 # minimum size of face
threshold = [ 0.6, 0.7, 0.7 ] # three steps’s threshold
factor = 0.709 # scale factor

创建mtcnn网络,并加载参数

print(‘Creating networks and loading parameters’)
with tf.Graph().as_default():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
with sess.as_default():
pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

def load_and_align_data(image, image_size, margin, gpu_memory_fraction):

# 读取图片 
img = image
# 获取图片的shape
img_size = np.asarray(img.shape)[0:2]
# 返回边界框数组 (参数分别是输入图片 脸部最小尺寸 三个网络 阈值 factor不清楚)
bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)

if len(bounding_boxes) < 1:
    return 0,0,0
else:    
    crop=[]
    det=bounding_boxes

    det[:,0]=np.maximum(det[:,0], 0)
    det[:,1]=np.maximum(det[:,1], 0)
    det[:,2]=np.minimum(det[:,2], img_size[1])
    det[:,3]=np.minimum(det[:,3], img_size[0])


    det=det.astype(int)

    for i in range(len(bounding_boxes)):
        temp_crop=img[det[i,1]:det[i,3],det[i,0]:det[i,2],:]
        aligned=misc.imresize(temp_crop, (image_size, image_size), interp='bilinear')
        prewhitened = facenet.prewhiten(aligned)
        crop.append(prewhitened)
    crop_image=np.stack(crop)
        
    return det,crop_image,1

with tf.Graph().as_default():
with tf.Session() as sess:
# 加载模型
facenet.load_model(model_dir)

    print('建立facenet embedding模型')
    # 返回给定名称的tensor
    images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
    embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
    phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")

    
    #开启ip摄像头
    video="http://admin:[email protected]:8081/"   #此处@后的ipv4 地址需要修改为自己的地址
    # 参数为0表示打开内置摄像头,参数是视频文件路径则打开视频
    capture =cv2.VideoCapture(video)
    cv2.namedWindow("camera",1)
    c=0
    num = 0
    frame_interval=3 # frame intervals  
    while True:
        ret, frame = capture.read()
        timeF = frame_interval

        # print(shape(frame))
        detect_face=[]

        if(c%timeF == 0):
            find_results=[]
            # cv2.imshow("camera",frame)

            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            if gray.ndim == 2:
                img = to_rgb(gray)
            det,crop_image,j= load_and_align_data(img, 160, 44, 1.0)
            if j:
                feed_dict = { images_placeholder: crop_image, phase_train_placeholder:False }        
                emb = sess.run(embeddings, feed_dict=feed_dict) 
                emb=list(emb[0,:])
                for i in range(len(features_known_arr)):
                    compare = return_euclidean_distance(emb, features_known_arr[i][0:-1])
                    if compare=="same":
                        result=features_known_arr[i][-1]
                        print("result:",result)
                        break
                    else:
                        result="unknown"
                # 绘制矩形框并标注文字
                for rec_position in range(len(det)):
                    
                    cv2.rectangle(frame,(det[rec_position,0],det[rec_position,1]),(det[rec_position,2],det[rec_position,3]),(0, 255, 0), 2, 8, 0)

                    cv2.putText(
                        frame,
                    result, 
                    (det[rec_position,0],det[rec_position,1]),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 
                    0.8, 
                    (0, 0 ,255), 
                    thickness = 2, 
                    lineType = 2)
            cv2.imshow('camera',frame)
    
        c+=1

        key = cv2.waitKey(3)

        if key == 27:
            #esc键退出
            print("esc break...")
            break

        if key == ord(' '):
            # 保存一张图像
            num = num+1
            filename = "frames_%s.jpg" % num
            cv2.imwrite(filename,frame)
        
    # When everything is done, release the capture
    capture.release()
    cv2.destroyWindow("camera")          

你可能感兴趣的:(人脸识别)