基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型(一)

前言

已完成TensorFlow Object Detection API环境搭建,具体搭建过程请参照:

 

安装运行谷歌开源的TensorFlow Object Detection API视频物体识别系统

 

Ubuntu系统安装配置tensorflow开发环境

 

准备工作

下载训练数据和验证数据

香港中文大学(Chinese University of Hong Kong)有大量的标注图像数据集。WIDER FACE数据集是一个人脸检测基准数据集。我用labelImg(https://github.com/tzutalin/labelImg)来显示边框。所选的文本是人脸检测注释。

结果集下载地址:http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/

基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型(一)_第1张图片

labelImg安装命令

pip install labelImg

其他安装方式请参照上面提供的地址自行尝试

lableImg使用命令

labelImg

效果

基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型(一)_第2张图片

基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型(一)_第3张图片

结果集中的图片与标注文件XML一一对应

下载模型

模型下载地址:https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md

 

基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型(一)_第4张图片

我选择的模型是faster_rcnn_inception_v2_coco,下载地址是:

http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz

编写代码进行相关操作001_down_data.py

# -*- coding: utf-8 -*-
'''
结果集下载与模型下载
'''
import requests
import os
import shutil
# unzip the files
import zipfile
import tarfile


def download_file_from_google_drive(id, destination):

    def get_confirm_token(response):
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value

        return None

    def save_response_content(response, destination):
        CHUNK_SIZE = 32768

        with open(destination, "wb") as f:
            for chunk in response.iter_content(CHUNK_SIZE):
                if chunk:  # filter out keep-alive new chunks
                    f.write(chunk)

    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params={ 'id' : id }, stream=True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params=params, stream=True)

    save_response_content(response, destination)


# The script
curr_path = os.getcwd()
models_path = os.path.join(curr_path, "data")

# make dir => wider_data in folder
try:
    os.makedirs(models_path)
except Exception as e:
    pass

print("files download start")

if os.path.exists(os.path.join(models_path, "train.zip")) == False:
    print("downloading.. train.zip -- 1.47GB")
    download_file_from_google_drive("0B6eKvaijfFUDQUUwd21EckhUbWs", os.path.join(models_path, "train.zip"))

if os.path.exists(os.path.join(models_path, "val.zip")) == False:
    print("downloading.. val.zip -- 362.8MB")
    download_file_from_google_drive("0B6eKvaijfFUDd3dIRmpvSk8tLUk", os.path.join(models_path, "val.zip"))

print("files download end")

print("files unzip start")

if os.path.exists(os.path.join(models_path, "WIDER_train")) == False:
    with zipfile.ZipFile(os.path.join(models_path, "train.zip"), "r") as zip_ref:
        zip_ref.extractall(models_path)

if os.path.exists(os.path.join(models_path, "WIDER_val")) == False:
    with zipfile.ZipFile(os.path.join(models_path, "val.zip"), "r") as zip_ref:
        zip_ref.extractall(models_path)

print("files unzip end")

print("annotation download start")

url = 'http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/bbx_annotation/wider_face_split.zip'
r = requests.get(url) 
with open(os.path.join(models_path, "wider_face_split.zip"), "wb") as code:
    code.write(r.content)

if os.path.exists(os.path.join(models_path, "wider_face_split")) == False:
    with zipfile.ZipFile(os.path.join(models_path, "wider_face_split.zip"), "r") as zip_ref:
        zip_ref.extractall(models_path) 
print("annotation download end")   

# downloading from: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
url = 'http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz'

if os.path.exists(os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz")) == False:
    response = requests.get(url, stream=True)
    with open(os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz"), 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)
    del response

filePath = os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz")
os.chdir(models_path)

if (filePath.endswith("tar.gz")):
    tar = tarfile.open(filePath, "r:gz")
    tar.extractall()
    tar.close()
elif (filePath.endswith("tar")):
    tar = tarfile.open(filePath, "r:")
    tar.extractall()
    tar.close()

print("done")

数据预处理

将WIDERFace转换为Pascal XML

首先,我们需要将人脸检测数据集转换为Pascal XML。Tensorflow和labelImg使用不同的格式。这些人脸检测图像将下载到WIDER_train文件夹中。我们将使用002 _data-to-pascal-xml.py转换WIDERFace数据并且将数据复制到一个不同的子文件夹中。我的电脑需要5分钟处理9263张图片。

002_data-to-pascal-xml.py

#!/usr/bin/env python3


"""
This script crawls over 9263 training images and 1873 items
On my Macbook pro this takes: 4 minutes

"""
import cv2
import os
import numpy as np
from glob import iglob # python 3.5 or newer
from shutil import copyfile


# The script
curr_path = os.getcwd()

import xml.etree.cElementTree as ET

# settings
cnt = 0
hog = cv2.HOGDescriptor((80, 80), (16, 16), (8,8), (8,8), 9)
# data = []
# label = []


def newXMLPASCALfile(imageheight, imagewidth, path, basename):
    # print(filename)
    annotation = ET.Element("annotation", verified="yes")
    ET.SubElement(annotation, "folder").text = "images"
    ET.SubElement(annotation, "filename").text = basename
    ET.SubElement(annotation, "path").text = path

    source = ET.SubElement(annotation, "source")
    ET.SubElement(source, "database").text = "test"

    size = ET.SubElement(annotation, "size")
    ET.SubElement(size, "width").text = str(imagewidth)
    ET.SubElement(size, "height").text = str(imageheight)
    ET.SubElement(size, "depth").text = "3"

    ET.SubElement(annotation, "segmented").text = "0"

    tree = ET.ElementTree(annotation)
    # tree.write("filename.xml")
    return tree

def appendXMLPASCAL(curr_et_object,x1, y1, w, h, filename):
    et_object = ET.SubElement(curr_et_object.getroot(), "object")
    ET.SubElement(et_object, "name").text = "face"
    ET.SubElement(et_object, "pose").text = "Unspecified"
    ET.SubElement(et_object, "truncated").text = "0"
    ET.SubElement(et_object, "difficult").text = "0"
    bndbox = ET.SubElement(et_object, "bndbox")
    ET.SubElement(bndbox, "xmin").text = str(x1)
    ET.SubElement(bndbox, "ymin").text = str(y1)
    ET.SubElement(bndbox, "xmax").text = str(x1+w)
    ET.SubElement(bndbox, "ymax").text = str(y1+h)
    filename = filename.strip().replace(".jpg",".xml")
    curr_et_object.write(filename)
    return curr_et_object




def readAndWrite(bbx_gttxtPath):
    cnt = 0
    with open(bbx_gttxtPath, 'r') as f:
        curr_img = ''

        curr_filename = ""
        curr_path = ""

        curr_et_object = ET.ElementTree()


        img = np.zeros((80, 80))
        for line in f:
            inp = line.split(' ')

            # if line.find("--") != -1:
            #     curr_filename = line.split('--')[1]
            #     # reset elements
            #     # emptyEl = ET.Element("")
            #     curr_et_object = ET.ElementTree()

            if len(inp)==1:
                img_path = inp[0]
                img_path = img_path[:-1]
                curr_img = img_path
                if curr_img.isdigit():
                    continue
                # print(Train_path+'/'+curr_img)
                img = cv2.imread(Train_path + '/' + curr_img, 2) # POSIX only
                # print( len(list(curr_et_object.getroot()) )  )
                curr_filename = curr_img.split("/")[1].strip()
                curr_path = os.path.join(Train_path, os.path.dirname(curr_img))
                curr_et_object = newXMLPASCALfile(img.shape[0],img.shape[1],curr_path, curr_filename )
                # print( curr_et_object  )

            else:
                # print(img)
                inp = [int(i) for i in inp[:-1]]
                x1, y1, w, h, blur, expression, illumination, invalid, occlusion, pose = inp
                n = max(w,h)
                if invalid == 1 or blur > 0 or n < 50:
                    continue
                img2 = img[y1:y1+n, x1:x1+n]
                img3 = cv2.resize(img2, (80, 80))
                vec = hog.compute(img3)
                # data.append(vec)
                # label.append(1)
                cnt += 1

                fileNow = os.path.join(curr_path,curr_filename)
                print("{}: {} {} {} {}".format(len(vec),x1, y1, w, h) + " " + fileNow)

                curr_et_object = appendXMLPASCAL(curr_et_object,x1, y1, w, h, fileNow )


# ################################ TRAINING DATA 9263 ITEMS ##################################
# # # Run Script for Training data
Train_path = os.path.join(curr_path, "data", "WIDER_train", "images" )
## comment this out
bbx_gttxtPath = os.path.join(curr_path, "data", "wider_face_split", "wider_face_train_bbx_gt.txt" )
readAndWrite(bbx_gttxtPath)


# To folders:
to_xml_folder = os.path.join(curr_path, "data", "tf_wider_train", "annotations", "xmls" )
to_image_folder = os.path.join(curr_path, "data", "tf_wider_train", "images" )

# make dir => wider_data in folder
try:
    os.makedirs(to_xml_folder)
    os.makedirs(to_image_folder)
except Exception as e:
    pass

rootdir_glob = Train_path + '/**/*' # Note the added asterisks # This will return absolute paths
file_list = [f for f in iglob(rootdir_glob, recursive=True) if os.path.isfile(f)]

train_annotations_index = os.path.join(curr_path, "data", "tf_wider_train", "annotations", "train.txt" )

with open(train_annotations_index, "a") as indexFile:
    for f in file_list:
        if ".xml" in f:
            print(f)
            copyfile(f, os.path.join(to_xml_folder, os.path.basename(f) ))
            img = f.replace(".xml",".jpg")
            copyfile(img, os.path.join(to_image_folder, os.path.basename(img) ))
            indexFile.write(os.path.basename(f.replace(".xml","")) + "\n")


################################ VALIDATION DATA 1873 ITEMS ##################################

# Run Script for Validation data
Train_path = os.path.join(curr_path, "data", "WIDER_val", "images" )
bbx_gttxtPath = os.path.join(curr_path, "data", "wider_face_split", "wider_face_val_bbx_gt.txt" )
readAndWrite(bbx_gttxtPath)


# To folders:
to_xml_folder = os.path.join(curr_path, "data", "tf_wider_val", "annotations", "xmls" )
to_image_folder = os.path.join(curr_path, "data", "tf_wider_val", "images" )

# make dir => wider_data in folder
try:
    os.makedirs(to_xml_folder)
    os.makedirs(to_image_folder)
except Exception as e:
    pass


rootdir_glob = Train_path + '/**/*' # Note the added asterisks # This will return absolute paths
file_list = [f for f in iglob(rootdir_glob, recursive=True) if os.path.isfile(f)]

train_annotations_index = os.path.join(curr_path, "data", "tf_wider_val", "annotations", "val.txt" )

with open(train_annotations_index, "a") as indexFile:
    for f in file_list:
        if ".xml" in f:
            print(f)
            copyfile(f, os.path.join(to_xml_folder, os.path.basename(f) ))
            img = f.replace(".xml",".jpg")
            copyfile(img, os.path.join(to_image_folder, os.path.basename(img) ))
            indexFile.write(os.path.basename(f.replace(".xml","")) + "\n")

运行

python 002_data-to-pascal-xml.py

效果

基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型(一)_第5张图片

基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型(一)_第6张图片

 

创建Pascal XML到Tensorflow CSV的索引

当数据转换为Pascal XML时,索引已经被创建。通过训练和验证数据集,我们将这些文件作为输入来制作TFRecords。也可以用labelImg这样的工具来手动标记图像,并使用这个步骤在这里创建一个索引。

003_xml-to-csv.py

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET

# source and credits:
# https://raw.githubusercontent.com/datitran/raccoon_dataset/master/xml_to_csv.py

def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df


def train():
    image_path = os.path.join(os.getcwd(), 'data', 'tf_wider_train', 'annotations','xmls')
    xml_df = xml_to_csv(image_path)
    labels_path = os.path.join(os.getcwd(), 'data', 'tf_wider_train','train.csv')
    xml_df.to_csv(labels_path, index=None)
    print('> tf_wider_train - Successfully converted xml to csv.')

def val():
    image_path = os.path.join(os.getcwd(), 'data', 'tf_wider_val', 'annotations','xmls')
    xml_df = xml_to_csv(image_path)
    labels_path = os.path.join(os.getcwd(), 'data', 'tf_wider_val', 'val.csv')
    xml_df.to_csv(labels_path, index=None)
    print('> tf_wider_val -  Successfully converted xml to csv.')

train()
val()

运行

python 003_xml-to-csv.py

效果

基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型(一)_第7张图片

 

创建TFRecord文件

TFRecords文件是一个大型的二进制文件,该文件被读取以训练机器学习模型。在下一步中,该文件将被Tensorflow按顺序读取。训练和验证数据将被转换成二进制文件。

004_generate_tfrecord.py

"""
Usage:
  # From tensorflow/models/
  # Create train data:
  python3 004_generate_tfrecord.py --images_path=data/tf_wider_train/images --csv_input=data/tf_wider_train/train.csv  --output_path=data/train.record
  # creates 847.6MB train.record

  # Create test/validation data:
  python3 004_generate_tfrecord.py --images_path=data/tf_wider_val/images --csv_input=data/tf_wider_val/val.csv  --output_path=data/val.record
  # creates 213.1MB  val.record

  source without adjustments: https://raw.githubusercontent.com/datitran/raccoon_dataset/master/generate_tfrecord.py
"""

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import os
import io
import pandas as pd
import tensorflow as tf

from PIL import Image
from object_detection.utils import dataset_util # from path
from collections import namedtuple, OrderedDict # tf slim

flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
flags.DEFINE_string('images_path', '', 'Path to images_folder')

FLAGS = flags.FLAGS


# TO-DO replace this with label map
def class_text_to_int(row_label):
    if row_label == 'face':
        return 1
    else:
        None


def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


def main(_):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    path = os.path.join(os.getcwd(), FLAGS.images_path)
    examples = pd.read_csv(FLAGS.csv_input)
    grouped = split(examples, 'filename')
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())

    writer.close()
    output_path = os.path.join(os.getcwd(), FLAGS.output_path)
    print('Successfully created the TFRecords: {}'.format(output_path))


if __name__ == '__main__':
    tf.app.run()

TFRecord的训练数据(847.6 MB)

python 004_generate_tfrecord.py --images_path=data/tf_wider_train/images --csv_input=data/tf_wider_train/train.csv  --output_path=data/train.record

TFRecord 的验证数据(213.1MB)

python 004_generate_tfrecord.py --images_path=data/tf_wider_val/images --csv_input=data/tf_wider_val/val.csv  --output_path=data/val.record

至此数据预处理已全部完成,效果如下:

基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型(一)_第8张图片

你可能感兴趣的:(机器学习,TensorFlow,人脸识别,tensorflow)