“ 一个人如果不能学会遗忘,那将是很痛苦的事,别再自寻烦恼,快把痛苦的事给忘了吧!”
为了能够使用Object Detection API~
需要将数据集格式转化为.TFRecord再进行训练~
至于,
如何使用Tensorflow官方的Object Detection API
包括下载、依赖(protobuf等)安装、跑demo、训练自己的数据过程~
推荐一篇博文: 1.https://blog.csdn.net/rookie_wei/article/details/81143814
2.https://blog.csdn.net/rookie_wei/article/details/81210499
3.https://blog.csdn.net/rookie_wei/article/details/81275663
整个过程比较详细,可以参考~
本篇主要介绍如何将已标注好的数据集转化成Tensorflow通用的.TFRecord格式~
注意:本程序是我自己检测的6类object,根据情况修改!
#-*- coding=utf-8 -*-
# File Name: Create_TFRecord.py
# Author: HZ
# Created Time: 2018-06-06
import os
import sys
import random
import numpy as np
import tensorflow as tf
import xml.etree.ElementTree as ET #操作xml文件
#我的标签定义有6类,根据自己的图片而定
VOC_LABELS = {
'none': (0, 'Background'),
'person': (1, 'Person'),
'car': (2, 'Car'),
'bus': (3, 'Bus'),
'truck': (4, 'Truck'),
'cyclist': (5, 'cyclist')
}
# 图片和标签存放的文件夹.
DIRECTORY_ANNOTATIONS = 'Annotations/'
DIRECTORY_IMAGES = 'JPEGImages/'
# 随机种子.
RANDOM_SEED = 4242
#生成整数型,浮点型和字符串型的属性
def int64_feature(value):
if not isinstance(value, list):
value = [value]
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def float_feature(value):
if not isinstance(value, list):
value = [value]
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def bytes_feature(value):
if not isinstance(value, list):
value = [value]
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
#图片处理
def _process_image(directory, name):
# Read the image file.
filename = directory + DIRECTORY_IMAGES + name + '.jpg'
image_data = tf.gfile.FastGFile(filename, 'rb').read()
# Read the XML annotation file.
filename = os.path.join(directory, DIRECTORY_ANNOTATIONS, name + '.xml')
tree = ET.parse(filename)
root = tree.getroot()
# Image shape.
size = root.find('size')
shape = [int(size.find('height').text),
int(size.find('width').text),
int(size.find('depth').text)]
# Find annotations.
bboxes = []
labels = []
labels_text = []
difficult = []
truncated = []
for obj in root.findall('object'):
label = obj.find('name').text
labels.append(int(VOC_LABELS[label][0]))
labels_text.append(label.encode('ascii')) #变为ascii格式
if obj.find('difficult'):
difficult.append(int(obj.find('difficult').text))
else:
difficult.append(0)
if obj.find('truncated'):
truncated.append(int(obj.find('truncated').text))
else:
truncated.append(0)
bbox = obj.find('bndbox')
a=float(bbox.find('ymin').text) / shape[0]
b=float(bbox.find('xmin').text) / shape[1]
a1=float(bbox.find('ymax').text) / shape[0]
b1=float(bbox.find('xmax').text) / shape[1]
a_e=a1-a
b_e=b1-b
if abs(a_e)<1 and abs(b_e)<1:
bboxes.append((a,b,a1,b1))
return image_data, shape, bboxes, labels, labels_text, difficult, truncated
#转化样例
def _convert_to_example(image_data, labels, labels_text, bboxes, shape,
difficult, truncated):
xmin = []
ymin = []
xmax = []
ymax = []
for b in bboxes:
assert len(b) == 4
# pylint: disable=expression-not-assigned
[l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)]
# pylint: enable=expression-not-assigned
image_format = b'JPEG'
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': int64_feature(shape[0]),
'image/width': int64_feature(shape[1]),
'image/channels': int64_feature(shape[2]),
'image/shape': int64_feature(shape),
'image/object/bbox/xmin': float_feature(xmin),
'image/object/bbox/xmax': float_feature(xmax),
'image/object/bbox/ymin': float_feature(ymin),
'image/object/bbox/ymax': float_feature(ymax),
'image/object/bbox/label': int64_feature(labels),
'image/object/bbox/label_text': bytes_feature(labels_text),
'image/object/bbox/difficult': int64_feature(difficult),
'image/object/bbox/truncated': int64_feature(truncated),
'image/format': bytes_feature(image_format),
'image/encoded': bytes_feature(image_data)}))
return example
#增加到tfrecord
def _add_to_tfrecord(dataset_dir, name, tfrecord_writer):
image_data, shape, bboxes, labels, labels_text, difficult, truncated = \
_process_image(dataset_dir, name)
example = _convert_to_example(image_data, labels, labels_text,
bboxes, shape, difficult, truncated)
tfrecord_writer.write(example.SerializeToString())
#name为转化文件的前缀
def _get_output_filename(output_dir, name, idx):
return '%s/%s_%03d.tfrecord' % (output_dir, name, idx)
def run(dataset_dir, output_dir, name='voc_train', shuffling=False):
if not tf.gfile.Exists(dataset_dir):
tf.gfile.MakeDirs(dataset_dir)
path = os.path.join(dataset_dir, DIRECTORY_ANNOTATIONS)
filenames = sorted(os.listdir(path)) #排序
# shuffling == Ture时,打乱顺序
if shuffling:
random.seed(RANDOM_SEED)
random.shuffle(filenames)
i = 0
fidx = 0
while i < len(filenames):
# Open new TFRecord file.
tf_filename = _get_output_filename(output_dir, name, fidx)
with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:
while i < len(filenames):
sys.stdout.write(' Converting image %d/%d \n' % (i+1, len(filenames)))#终端打印,类似print
sys.stdout.flush() #缓冲
filename = filenames[i]
img_name = filename[:-4]
_add_to_tfrecord(dataset_dir, img_name, tfrecord_writer)
i += 1
fidx += 1
print('\nFinished converting the Pascal VOC dataset!')
#原数据集路径,输出路径以及输出文件名
dataset_dir="./VOC2007/"
output_dir="./TFRecords"
name="voc_train"
def main(_):
run(dataset_dir, output_dir,name)
if __name__ == '__main__':
tf.app.run()
在获得训练好的模型,进行检测时的demo.py如下: (较好)
#encoding:utf-8
import tensorflow as tf
import numpy as np
import os
from matplotlib import pyplot as plt
from PIL import Image
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_utils
#下载下来的模型的目录
MODEL_DIR = 'object_detection/ssd_mobilenet_v1_coco_2018_01_28'
#下载下来的模型的文件
MODEL_CHECK_FILE = os.path.join(MODEL_DIR, 'frozen_inference_graph.pb')
#数据集对于的label
MODEL_LABEL_MAP = os.path.join('object_detection/data', 'mscoco_label_map.pbtxt')
#数据集分类数量,可以打开mscoco_label_map.pbtxt文件看看
MODEL_NUM_CLASSES = 90
#这里是获取实例图片文件名,将其放到数组中
PATH_TO_TEST_IMAGES_DIR = 'object_detection/test_images'
TEST_IMAGES_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 6)]
#输出图像大小,单位是in
IMAGE_SIZE = (12, 8)
tf.reset_default_graph()
#将模型读取到默认的图中
with tf.gfile.GFile(MODEL_CHECK_FILE, 'rb') as fd:
_graph = tf.GraphDef()
_graph.ParseFromString(fd.read())
tf.import_graph_def(_graph, name='')
#加载COCO数据标签,将mscoco_label_map.pbtxt的内容转换成
# {1: {'id': 1, 'name': u'person'}...90: {'id': 90, 'name': u'toothbrush'}}格式
label_map = label_map_util.load_labelmap(MODEL_LABEL_MAP)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=MODEL_NUM_CLASSES)
category_index = label_map_util.create_category_index(categories)
#将图片转化成numpy数组形式
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
#在图中开始计算
detection_graph = tf.get_default_graph()
with tf.Session(graph=detection_graph) as sess:
for image_path in TEST_IMAGES_PATHS:
print(image_path)
#读取图片
image = Image.open(image_path)
#将图片数据转成数组
image_np = load_image_into_numpy_array(image)
#增加一个维度
image_np_expanded = np.expand_dims(image_np, axis=0)
#下面都是获取模型中的变量,直接使用就好了
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
#存放所有检测框
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
#每个检测结果的可信度
scores = detection_graph.get_tensor_by_name('detection_scores:0')
#每个框对应的类别
classes = detection_graph.get_tensor_by_name('detection_classes:0')
#检测框的个数
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
#开始计算
(boxes, scores, classes, num_detections) = sess.run([boxes, scores, classes, num_detections],
feed_dict={image_tensor : image_np_expanded})
#打印识别结果
print(num_detections)
print(boxes)
print(classes)
print(scores)
#得到可视化结果
vis_utils.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8
)
#显示
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
plt.show()
恩,复习+巩固!
sweet~