未完待续。。。
faster-rcnn的模型训练需要大量数据集,目前使用labelImg工具,需要人工一张一张标注,效率低。本文使用python编写了自动化图片标注和数据集生成工具(本文标注目标是人脸,大家可以根据需要训练其他目标判别模型,比如汽车、自行车等标注需要的图片数据集)。
使用该工具可以自动完成整个数据集的生成(图片标注和数据集文件),生成的数据集符合pascal voc格式,可以直接在faster-rcnn中训练模型。
效果展示
000182.jpg
本文只说明原理,提供参考,实际应用需考虑其他因素。
环境配置
win7
python3.6.3
tensorflow-gpu1.5(cuda_9.0.176_windows.exe,cudnn-7.0.5(其他版本报错,运行占用内存较多会异常终止))
keras2.1.4
注意:各软件之间版本之间存在适配问题。
实现步骤
一、准备
1.1 素材准备
标注的素材准备(1.视频(人脸较多)、2.文件夹(人物网站爬取或自己收集))
1.2.环境搭建
1.3.人脸检测模型选择
一般有以下可选
1.3.1 使用opencv 参考(点击跳转)
1.3.2 使用dlib 参考(点击跳转)
1.3.3 使用腾讯、百度或阿里(旷世科技Face++)等开发平台的的人脸检测API接口 参考(点击名称跳转)
1.3.4 自己训练人脸检测的模型(神经网络(SelectiveSearch + CNN)) 参考(点击跳转)
二、实现步骤(视频,本文选择《欢乐颂》电视剧视频)
2.1.打开视频,截取一帧视频(图片)
2.2 判断2.1中图片有是否有人脸(使用1.3.2 dlib人脸检测模型)
2.3 如果2.2中有人脸(没有人脸则跳转到2.1继续截取,直到视频结束),则获取人脸的区域保存该区域图片和人脸区域图片在2.1图片中的位置(左上角x(xmin),左上角y(ymin),右下角x(xmax),右下角y(ymax))保存到data\VOC2007\JPEGImages 中。
图片命名000001.jpg、000002.jpg以此递增。
同时data\VOC2007\Annotations中存放同名(后缀不同)文件0000001.xml。xml文件存放图片中人脸的类别和位置。图片和人脸数据的关系是1对多(一张图片中存在一个或多个人脸,无人脸图片已过滤)。
0000182.jpg(3个人脸)
0000182.xml(3个人脸对应3个object)
VOC2007
000182.jpg
uma_tyan
?
960
540
3
0
3
代码
set_face_dataset.py
建立Pascal VOC数据集文件结构、xml文件生成、数据集分割(训练集、测试集)。
count.txt 用于计数,初始化第一行写数字1
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import cv2
import random
from lxml import etree, objectify
from lxml.etree import Element, SubElement, tostring
base_dir = os.path.join(os.getcwd(), 'training_data\VOC2007')
jpg_dir = os.path.join(base_dir, 'JPEGImages')
ano_dir = os.path.join(base_dir, 'Annotations')
img_dir = os.path.join(base_dir, r'ImageSets\Main')
count_dir = os.path.join(base_dir, r'count.txt')
def setdir():
if not os.path.isdir(base_dir):
os.makedirs(base_dir)
if not os.path.isdir(jpg_dir):
os.makedirs(jpg_dir)
if not os.path.isdir(ano_dir):
os.makedirs(ano_dir)
if not os.path.isdir(img_dir):
os.makedirs(img_dir)
if not os.path.isdir(count_dir):
os.makedirs(count_dir)
# 计数文件初始化
with open(count_dir, 'w') as f:
f.writelines('1')
def make_xml(facelist, image_name, w, h):
(filepath, tempfilename) = os.path.split(image_name)
(filename, extension) = os.path.splitext(image_name)
annotation = etree.Element("annotation")
etree.SubElement(annotation, "folder").text = "VOC2007"
etree.SubElement(annotation, "filename").text = tempfilename
source = etree.SubElement(annotation, "source")
etree.SubElement(source, "database").text = "The VOC2007 Database"
etree.SubElement(source, "annotation").text = "PASCAL VOC2007"
etree.SubElement(source, "image").text = "flickr"
etree.SubElement(source, "flickrid").text = "340039936"
size = etree.SubElement(annotation, "owner")
etree.SubElement(size, "flickrid").text = 'uma_tyan' # 必须用string
etree.SubElement(size, "name").text = '?'
size = etree.SubElement(annotation, "size")
etree.SubElement(size, "width").text = str(w)
etree.SubElement(size, "height").text = str(h)
etree.SubElement(size, "depth").text = '3'
etree.SubElement(annotation, "segmented").text = '0'
list_len = len(facelist)
etree.SubElement(annotation, "object_num").text = str(list_len)
for i in range(list_len):
key_object = etree.SubElement(annotation, "object")
etree.SubElement(key_object, "name").text = "face"
etree.SubElement(key_object, "pose").text = "Unspecified"
etree.SubElement(key_object, "truncated").text = '0'
etree.SubElement(key_object, "difficult").text = '0'
bndbox = etree.SubElement(key_object, "bndbox")
etree.SubElement(bndbox, "xmin").text = str(facelist[i][0].left())
etree.SubElement(bndbox, "ymin").text = str(facelist[i][0].top())
etree.SubElement(bndbox, "xmax").text = str(facelist[i][0].right())
etree.SubElement(bndbox, "ymax").text = str(facelist[i][0].bottom())
doc = etree.ElementTree(annotation)
xml_name = os.path.join(filepath, filename + '.xml')
xml_name = xml_name.replace("JPEGImages","Annotations")
doc.write(open(xml_name, "wb"), encoding='utf-8',pretty_print=True)
return
# 文件后缀自动修改为".jpg"
def file_name(file_dir):
L=[]
for root, dirs, files in os.walk(file_dir):
for file in files:
if os.path.splitext(file)[1] == '.xml':
L.append(file[:-4])
return L
# 文件后缀自动修改为".jpg"
def file_name():
L=[]
for root, dirs, files in os.walk(ano_dir):
for file in files:
if os.path.splitext(file)[1] == '.xml':
L.append(file[:-4])
return L
# 记录分割
def file_seg(list):
list_len = len(list)
list_len_2 = int(list_len/2)
list_len_4 = int(list_len/4)
random.shuffle(list)
with open(os.path.join(img_dir,'trainval.txt'),'w') as f:
f.write('\n'.join(list[0:list_len_2]))
with open(os.path.join(img_dir,'test.txt'),'w') as f:
f.write('\n'.join(list[list_len_2:]))
with open(os.path.join(img_dir,'train.txt'),'w') as f:
f.write('\n'.join(list[0:list_len_4]))
with open(os.path.join(img_dir,'val.txt'),'w') as f:
f.write('\n'.join(list[list_len_4:list_len_2]))
return
def setfacedb(img, list):
with open(count_dir,'r') as f:
count = f.readline()
filetype = '.jpg'
Newdir = os.path.join(jpg_dir, str(count).zfill(6) + filetype)
print(Newdir)
cv2.imwrite(Newdir,img)
image= cv2.imread(Newdir)
h, w = image.shape[:2]
# 新建xml文件
try:
make_xml(list, Newdir, w, h)
except Exception as e:
print(e)
with open(count_dir,'w') as f:
f.write(str(int(count) + 1))
if __name__ == "__main__":
setdir()
video_face_sign_dir.py
自动化标记图片并整理到对应文件夹中
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import numpy as np
import sys
import time
import cv2
import dlib
from setfacedb import*
from keras.preprocessing import image as imagekeras
from keras.models import load_model
from PIL import Image, ImageDraw, ImageFont
from glob import glob
size = 150
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'keras_face_trained_model.h5'
# 类别编码转换为中文名称返回
def return_name(codelist):
names = ['樊胜美', '关雎尔', '邱莹莹']
for it in range(0, len(codelist), 1):
if int(codelist[it]) == 1.0:
return names[it]
# 类别编码转换为英文名称返回
def return_name_en(codelist):
names = ['fsm', 'gje', 'qyy']
for it in range(0, len(codelist), 1):
if int(codelist[it]) == 1.0:
return names[it]
# 返回目录下全部.jpg文件的列表
def get_image(image_path):
name = glob(image_path+"/*jpg")
return name
# 区分和标记视频中截图的人脸
def face_rec():
global image_ouput
model = load_model(os.path.join(save_dir, model_name))
# camera = cv2.VideoCapture("2.mp4") # 视频
# camera = cv2.VideoCapture(0) # 摄像头
# img_list = get_image(os.path.join(os.getcwd(), 'img'))
img_list = get_image(os.path.join(os.getcwd(), 'test1'))
for imgitem in img_list:
# read, img = camera.read()
img = cv2.imread(imgitem)
img1 = img
try:
# 未截取视频图片结束本次循环
if not (type(img1) is np.ndarray):
continue
gray_img = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) # 图片转为灰度图
except:
print("Unexpected error:", sys.exc_info()[0])
break
# 使用detector进行人脸检测
# 使用dlib自带的frontal_face_detector作为我们的特征提取器
detector = dlib.get_frontal_face_detector()
dets = detector(gray_img, 1) # 提取截图中所有人脸
if len(dets) < 1: # 没有人脸,结束本次循环
continue
facelist = []
for i, d in enumerate(dets): # 依次区分截图中的人脸
x1 = d.top() if d.top() > 0 else 0
y1 = d.bottom() if d.bottom() > 0 else 0
x2 = d.left() if d.left() > 0 else 0
y2 = d.right() if d.right() > 0 else 0
face = img[x1:y1, x2:y2]
face = cv2.resize(face, (size, size))
x_input = np.expand_dims(face, axis=0)
prey = model.predict(x_input)
print(prey, 'prey')
facelist.append([d, return_name(prey[0])]) # 存储一张图中多张人脸坐标
setfacedb(img1,facelist)
cv2_im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # cv2和PIL中颜色的hex码的储存顺序不同
pil_im = Image.fromarray(cv2_im)
draw = ImageDraw.Draw(pil_im) # 括号中为需要打印的cqanvas,这里就是在图片上直接打印
font = ImageFont.truetype("simhei.ttf", 20, encoding="utf-8") # 第一个参数为字体文件路径,第二个为字体大小
cv2_char_img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)
# 显示图片
cv2.imshow("camera", cv2_char_img)
cv2.destroyAllWindows()
if __name__ == "__main__":
face_rec() # 人脸目标自动化生成
file_seg(file_name()) # 生成txt文件
详细讲解
1、运行set_face_dataset.py 生成Pascal voc 文件结构(setdir())
2、运行video_face_sign_dir.py (先选择视频)生成数据(图片、标注、训练和验证数据(txt))
推荐阅读
https://saicoco.github.io/object-detection-4/
https://blog.csdn.net/hongxingabc/article/details/79039537
https://blog.csdn.net/yaoqi_isee/article/details/79254574
https://blog.csdn.net/sinat_30071459/article/details/50723212
https://www.cnblogs.com/zjutzz/p/5983160.html
https://github.com/endernewton/tf-faster-rcnn
https://github.com/yhenon/keras-frcnn
https://github.com/fizyr/keras-retinanet
https://blog.csdn.net/wyx100/article/details/81235208
https://blog.csdn.net/qq_32799915/article/details/79081659
https://tzutalin.github.io/labelImg/
完整项目下载
为方便没积分童鞋,请加企鹅,共享文件夹。
包括:代码、数据集合(图片)、已生成model、安装库文件等。
https://github.com/gbusr/ML/tree/master/facecnn
set_face_dataset.py和video_face_sign_dir.py存放到facecnn目录下,即可运行。
QQ(群:452205574)