因为处理自己的照片分类,萌生了一个通过人脸识别来归类的想法,虽然现在的网络相册都提供了该功能,但是就当是学习了,贴近于AI,现在不是流行AI嘛,本来计划是自己写,网上的教程也多入牛毛,结果写到一半的时候,发现一个开源库face_recognition,据说识别准确率达到98%。且不管这个准确率,可以拿来用就可以。具体的安装过程,教程也有很多,可以自行查找。因为采用了第三方库,所以核心功能调用对应方法即可,代码大部分是处理识别前和识别后的工作。期间看到一个大神写的关于人脸识别的文章,贴在此。供个人参考(https://www.cnblogs.com/neo-T/p/6432596.html)。
本文中face_recognition.face_locations采用model = "cnn"的模式,该模式消耗机器内存和CPU非常大,所以在识别前,对图片了处理,用缩略图作为临时文件的形式来处理的,尽管如此,100张图片也用了12H,所以机器配置不够的话慎用。也可以采用默认的model。
需要先拿出样本数据,你想找的人的图片,最好是单人正面照。我放置了10个样本,有部分侧面的和相对模糊的,对比识别时有些好像也是可以识别出来的。还有阈值,要多调试几次找出适合自己的阈值。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019-07-10 18:10
# @Author : yy
# @File : CNNImage.py
import os
import cv2
import face_recognition
from PIL import Image
import numpy as np
import exifread
import shutil
import time
class Imagetags():
def __init__(self):
self.SampPath =r'E:\Sample'
self.Pic = r'E:\Pic'
self.Photo = r'E:\Photo'
self.TmpPath = r'E:\PicTmp'
def ResizeImage(self,path):
ImgTmp = cv2.imread(path)
# Img = cv2.resize(img, (200, 100))
Img = cv2.resize(ImgTmp, (0, 0),fx=0.5,fy=0.5,interpolation=cv2.INTER_CUBIC)
return Img
def PubToCnnFile(self,ImgPath):
Img = Image.open(ImgPath)
#设置缩略图的最大尺寸
size = (640, 640)
Img.thumbnail(size)
TmpImgName = os.path.basename(ImgPath).split('.')[0] + '_tmp' + '.png'
PicTmpPath = os.path.join(self.TmpPath, TmpImgName)
#保存为PNG格式
Img.save(PicTmpPath, 'PNG')
# Img.close()
return PicTmpPath
# os.remove(tmppath)
def GetImageCodes(self,ImgPath):
#针对CNN模式 先处理图片为缩略图形式,以避免文件太大,导致消耗过大CPU和内存。
PicTmpPath = self.PubToCnnFile(ImgPath)
#加载文件
img = face_recognition.load_image_file(PicTmpPath)
# model = "cnn"模式消耗内存和CPU资源太大,PC机无法运行,该模式识别率更高
# face_locations = face_recognition.face_locations(img)
face_locations = face_recognition.face_locations(img,model = "cnn")
# 针对有可能多个头像情况
Photo_Encodings = face_recognition.face_encodings(img, face_locations)
return Photo_Encodings
def GetSampleImgCodes(self):
#取样
SampleCodeList = []
for dirpath,dirnames,filenames in os.walk(self.SampPath):
for item in filenames:
SampleImgPath = os.path.join(dirpath, item)
print(SampleImgPath)
Sample_Encodings = self.GetImageCodes(SampleImgPath)
for codes in Sample_Encodings:
SampleCodeList.append(codes)
print(len(SampleCodeList))
# 返回样本的人脸数据 list
return SampleCodeList
def PubMoveFile(self,ImgPath,InPath,item):
FileName = os.path.basename(ImgPath)
#判断文件类型
if FileName.endswith(('MP4','mp4')):
date = self.GetMp4EXIF(ImgPath)
elif FileName.endswith(('jpg','JPG')):
date = self.GetJpgEXIF(ImgPath)
Datepath = os.path.join(InPath, date,item)
if os.path.exists(Datepath):
shutil.move(ImgPath, Datepath)
else:
os.makedirs(Datepath)
shutil.move(ImgPath, Datepath)
#返回文件的存储路径
return Datepath
def ComparePic(self,ImgPath,SampleCodeList):
compareList = []
Photo_Encodings = self.GetImageCodes(ImgPath)
if Photo_Encodings:
i=1
for code in Photo_Encodings:
i = i+1
resList = []
for sample_codes in SampleCodeList:
#tolerance=0.4设定阈值,阈值越小精确度越高,对比方法compare_faces
CompareRes = face_recognition.compare_faces([sample_codes],code, tolerance=0.4)
resList = resList+CompareRes
if resList.count(True) < resList.count(False):
compareList.append(False)
else:
compareList.append(True)
return compareList
def CompareImage(self):
SampleCodeList = self.GetSampleImgCodes()
print('样本数据量:',len(SampleCodeList))
print('开始对比')
starttime = time.time()
# 识别数量初始化
x = 0
y = 0
for dirpath,dirnames,filenames in os.walk(self.Pic):
for file in filenames:
ImgPath = os.path.join(dirpath, file)
if 'mp4' in file or 'MP4' in file:
self.PubMoveFile(ImgPath, self.Photo,'MP4')
else:
# imagepath = os.path.join(dirpath, file)
compareList = self.ComparePic(ImgPath,SampleCodeList)
if True in compareList:
#识别数量
x = x+1
GoPath = self.PubMoveFile(ImgPath,self.Photo,'Zhou')
print('识别到样本数据人物:', GoPath)
else:
#未识别数量
y = y+1
GoPath = self.PubMoveFile(ImgPath,self.Photo,'Other')
#打印目标路径 ,len(compareList) 当失败后查看图片的人物信息。
print('未识别到样本数据人物:', GoPath,len(compareList))
# self.MoveFile(file,ImgPath,compareList)
endtime = time.time()
TTime = endtime-starttime
print('用时:',TTime)
print('识别到:',x,'未识别到:',y)
def GetJpgEXIF(self,imagepath):
Files = open(imagepath, 'rb')
tags = exifread.process_file(Files)
Files.close()
ExDate = 'EXIF DateTimeOriginal'
if "EXIF DateTimeOriginal" in tags.keys():
datetmp = str(tags[ExDate]).split(' ')[0].split(':')
date = '_'.join(datetmp[0:2])
else:
file = os.path.basename(imagepath)
if ('HBGC' in file) or ('IMG' in file):
datetmp = file.split('_')[1][0:6]
year = datetmp[0:4]
month = datetmp[4:6]
date = year+'_'+month
return date
def GetMp4EXIF(self,file):
datetmp = str(file).split('_')[1][0:6]
year = datetmp[0:4]
month = datetmp[4:6]
date = year + '_' + month
return date
run = Imagetags()
run.CompareImage()