说明:我的数据集一共1035张,并非通过手动标记得到,因此不包含xml文件
包含:图片文件,train.csv,test.csv。(图片位置,标记位置,目标名称等)
说明:数据集本人通过制作而成,只供学习使用,不得私自滥用。
链接:https://pan.baidu.com/s/18FdxUHiLnD1B52Jpt4QC6g
提取码:zw6b
我的gpu环境
CUDA 10.0
CUDNN 7.6.5
gpu环境自己查找资料安装,参考:https://blog.csdn.net/m0_37872216/article/details/103136477
OPENCV 4.1.1
object-detection 0.1 参考,https://blog.csdn.net/qq_38641985/article/details/116779147
object-detection 官方检测库,包含案例。是该项目能够正常运行的核心。
下载位置,https://github.com/tensorflow/models
config 双眼训练配置文件
eye_label 人眼数据训练集
model 以往的训练完成的模型,有无都可以
singer 单眼训练配置文件
test 测试图片及视频
training 预训练模型(ssd)
gennrate_tfrecord_v2.py tf2将csv文件转化为record
model_main_tf2.py tf2训练文件
exporter_main_v2.py tf2导出训练模型
TF-image-object-counting.py tf2图片目标检测
TF-image-od.py tf2图片目标检测
TF-video-object-counting.py tf2视频目标检测
TF-video-od.py tf2视频目标检测
预训练模型下载:https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md
这一步这个案例可以跳去,我已经获取到了目标位置,自行下载。
输出xml文件
image
0002
E:/Download/object_detection_training/training_dectetion_eye/eye_label/image/0002.png
1920
1080
3
0
这一步操作极其简单,但是操作极其无聊,浪费时间。本案例中本人已经完成了1035张图片的标注工作,并非通过此软件完成,不包含xml文件。
感兴趣可以标注几十张自己的图片学习使用。
trainv 训练文件及对应的xml
test 测试文件及对应的xml
xml_csv.py 运行它生成两个csv文件
train_labels.csv xml_csv.py 运行生成
test_labels.csv xml_csv.py 运行生成
# -*- coding:utf-8 -*-
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
import random
root_file = os.path.join(os.path.dirname(__file__))
def xml_to_csv(path,style):
xml_list = []
img_file = glob.glob(path + '/*.xml')
random.shuffle(img_file)
for xml_file in img_file:
tree = ET.parse(xml_file)
root = tree.getroot()
if True:
for member in root.findall('object'):
value = (
root_file+"/"+style+"/"+root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
print(value)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
def main():
image_path1 = 'train/'
image_path2 = 'test/'
csv_save_path = 'train_labels.csv'
csv_save_path_test = 'test_labels.csv'
xml_df= xml_to_csv(image_path1,'train')
xml_df_test= xml_to_csv(image_path2,'test')
xml_df.to_csv(csv_save_path, index=None)
xml_df_test.to_csv(csv_save_path_test, index=None)
print('Successfully converted xml to csv.')
main()
说明:value部分因xml文件格式不同,或许需要做些许修改。
image,1035张图片
display_rect.py 检查图片是否正确
test_singer.csv 单眼测试文件
train_singer.csv 单眼训练文件
test_csv.csv 双眼测试文件
train_csv.csv 双眼训练文件
txt.txt 一些常用命令
说明:因为这里已经得到了csv文件,故因此不需要对xml文件进行处理,里面也没有xml文件。
# -*- coding=utf-8 -*-
import numpy as np
import cv2
import os
import pandas as pd
import csv
import random
root_file = os.path.join(os.path.dirname(__file__))
def pic_box(file):
data = []
all_list = []
with open(file) as f:
f_csv = csv.reader(f)
headers = next(f_csv)
print (f_csv)
for i,row in enumerate(f_csv):
all_list.append(row)
row =random.choice(all_list)
x,y,w,h = int(row[-4]),int(row[-3]),int(row[-2]),int(row[-1])
src = "image/"+row[0].split("\\")[-1]
print(src)
img = cv2.imread(src)
cv2.putText(img, "frame:{0}".format(src.split("\\")[-1]), (1400, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
cv2.rectangle(img,(x,y),(w,h),(0,255,9),2)
cv2.imshow("Frame", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def main():
file = "train_csv.csv"
pic_box(file)
main()
说明,每次随机检测一张图片,显示检测图片的位置信息
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import os
import io
import pandas as pd
#import tensorflow as tf
import tensorflow.compat.v2 as tf
import sys
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
flags = tf.compat.v1.app.flags
flags.DEFINE_string('csv_input', default='/home/hanqing/SSD-Tensorflow-master/VOC2019/ImageSets/Main/csv/sj_train1.csv',help='')
flags.DEFINE_string('output_path', default='/home/hanqing/SSD-Tensorflow-master/tfrecords_/sj_train.record',help='')
flags.DEFINE_string('image_dir', default='/home/hanqing/SSD-Tensorflow-master/VOC2019/JPEGImages/sj_data/',help='')
FLAGS = flags.FLAGS
def class_text_to_int(row_label):
if row_label == "animation_person":
return 1
elif row_label == 'women':
return 2
else:
return 0
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
with tf.compat.v1.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(str(row['class']).encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer = tf.compat.v1.python_io.TFRecordWriter(FLAGS.output_path)
path = os.path.join(FLAGS.image_dir)
examples = pd.read_csv(FLAGS.csv_input)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
output_path = os.path.join(os.getcwd(), FLAGS.output_path)
print('Successfully created the TFRecords: {}'.format(output_path))
if __name__ == '__main__':
#tf.app.run()
tf.compat.v1.app.run()
在工程目录下进入cmd,执行命令
gennrate_tfrecord_v2.py --csv_input=eye_label//train_singer.csv --output_path=singer/train.record --image_dir=eye_label/image
gennrate_tfrecord_v2.py --csv_input=eye_label/test_singer.csv --output_path=singer/test.record --image_dir=eye_label/image
执行命令完成后,在singer目录会自动生成train.record,test.record文件。
我使用的,可以更换其他的。
http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz
如果有问题可以下载我的数据集里面有一个training.rar文件。
将下面文件复制到这个目录
eval 最后输出模型目录,开始为空
model 训练模型目录,开始为空
video eye.mp4,其他测试图片
Image_Dectction.py --> TF-image-od.py修改生成
Video_Dectction.py --> TF-video-od.py修改生成
label_map.pbtxt 目标种类
pipeline.config 训练配置文件
从object-detection目录寻找一个类似的文件,修改,或手动输入。
根据类别而定,这里只有一类
在预训练模型下载后的解压位置复制过去,或object-detection目录查找对应配置文件
在object_detection目录找到文件,复制到
model_main_tf2.py --pipeline_config_path=singer/pipeline.config --model_dir=singer/model/ --logtostderr
exporter_main_v2.py --input_type=image_tensor --pipeline_config_path=singer/pipeline.config --trained_checkpoint_dir=singer/model --output_directory=singer/eval
# coding: utf-8
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow logging (1)
import pathlib
import tensorflow as tf
import cv2
import argparse
tf.get_logger().setLevel('ERROR') # Suppress TensorFlow logging (2)
parser = argparse.ArgumentParser()
parser.add_argument('--model', help='Folder that the Saved Model is Located In',
default='eval')
parser.add_argument('--labels', help='Where the Labelmap is Located',
default='label_map.pbtxt')
parser.add_argument('--image', help='Name of the single image to perform detection on',
default='video/0072.png')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
default=0.50)
args = parser.parse_args()
# Enable GPU dynamic memory allocation
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# PROVIDE PATH TO IMAGE DIRECTORY
IMAGE_PATHS = args.image
# PROVIDE PATH TO MODEL DIRECTORY
PATH_TO_MODEL_DIR = args.model
# PROVIDE PATH TO LABEL MAP
PATH_TO_LABELS = args.labels
# PROVIDE THE MINIMUM CONFIDENCE THRESHOLD
MIN_CONF_THRESH = float(args.threshold)
# LOAD THE MODEL
import time
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
PATH_TO_SAVED_MODEL = PATH_TO_MODEL_DIR + "/saved_model"
print('Loading model...', end='')
start_time = time.time()
# LOAD SAVED MODEL AND BUILD DETECTION FUNCTION
detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL)
end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))
# LOAD LABEL MAP DATA FOR PLOTTING
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS,
use_display_name=True)
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore') # Suppress Matplotlib warnings
def load_image_into_numpy_array(path):
"""Load an image from file into a numpy array.
Puts image into numpy array to feed into tensorflow graph.
Note that by convention we put it into a numpy array with shape
(height, width, channels), where channels=3 for RGB.
Args:
path: the file path to the image
Returns:
uint8 numpy array with shape (img_height, img_width, 3)
"""
return np.array(Image.open(path))
print('Running inference for {}... '.format(IMAGE_PATHS), end='')
image = cv2.imread(IMAGE_PATHS)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_expanded = np.expand_dims(image_rgb, axis=0)
# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
input_tensor = tf.convert_to_tensor(image)
# The model expects a batch of images, so add an axis with `tf.newaxis`.
input_tensor = input_tensor[tf.newaxis, ...]
# input_tensor = np.expand_dims(image_np, 0)
detections = detect_fn(input_tensor)
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
image_with_detections = image.copy()
# SET MIN_SCORE_THRESH BASED ON YOU MINIMUM THRESHOLD FOR DETECTIONS
viz_utils.visualize_boxes_and_labels_on_image_array(
image_with_detections,
detections['detection_boxes'],
detections['detection_classes'],
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=100,
min_score_thresh=MIN_CONF_THRESH,
agnostic_mode=False)
print('\n' + 'Done')
# DISPLAYS OUTPUT IMAGE
#if image_with_detections.shape[0]>1000:
#image_with_detections = cv2.resize(image_with_detections,[int(0.5*image_with_detections.shape[0]),[int(0.5*image_with_detections.shape[1]]))
cv2.imwrite("pic/"+IMAGE_PATHS.split("/")[-1],image_with_detections)
cv2.imshow('Object Detector', image_with_detections)
#cv2.resizeWindow("Object Detector", 720,1280)
# CLOSES WINDOW ONCE KEY IS PRESSED
cv2.waitKey(0)
# CLEANUP
cv2.destroyAllWindows()
# coding: utf-8
"""
Object Detection (On Video) From TF2 Saved Model
=====================================
"""
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow logging (1)
import pathlib
import tensorflow as tf
import cv2
import argparse
tf.get_logger().setLevel('ERROR') # Suppress TensorFlow logging (2)
parser = argparse.ArgumentParser()
parser.add_argument('--model', help='Folder that the Saved Model is Located In',
default='eval')
parser.add_argument('--labels', help='Where the Labelmap is Located',
default='label_map.pbtxt')
parser.add_argument('--video', help='Name of the video to perform detection on. To run detection on multiple images, use --imagedir',
default='video/eye.mp4')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
default=0.75)
args = parser.parse_args()
# Enable GPU dynamic memory allocation
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# PROVIDE PATH TO IMAGE DIRECTORY
VIDEO_PATHS = args.video
# PROVIDE PATH TO MODEL DIRECTORY
PATH_TO_MODEL_DIR = args.model
# PROVIDE PATH TO LABEL MAP
PATH_TO_LABELS = args.labels
# PROVIDE THE MINIMUM CONFIDENCE THRESHOLD
MIN_CONF_THRESH = float(args.threshold)
# Load the model
# ~~~~~~~~~~~~~~
import time
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
PATH_TO_SAVED_MODEL = PATH_TO_MODEL_DIR + "/saved_model"
print('Loading model...', end='')
start_time = time.time()
# Load saved model and build the detection function
detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL)
end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))
# Load label map data (for plotting)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS,
use_display_name=True)
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore') # Suppress Matplotlib warnings
def load_image_into_numpy_array(path):
"""Load an image from file into a numpy array.
Puts image into numpy array to feed into tensorflow graph.
Note that by convention we put it into a numpy array with shape
(height, width, channels), where channels=3 for RGB.
Args:
path: the file path to the image
Returns:
uint8 numpy array with shape (img_height, img_width, 3)
"""
return np.array(Image.open(path))
print('Running inference for {}... '.format(VIDEO_PATHS), end='')
video = cv2.VideoCapture(VIDEO_PATHS)
while(video.isOpened()):
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
# i.e. a single-column array, where each item in the column has the pixel RGB value
ret, frame = video.read()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
frame_expanded = np.expand_dims(frame_rgb, axis=0)
# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
input_tensor = tf.convert_to_tensor(frame)
# The model expects a batch of images, so add an axis with `tf.newaxis`.
input_tensor = input_tensor[tf.newaxis, ...]
# input_tensor = np.expand_dims(image_np, 0)
detections = detect_fn(input_tensor)
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
frame_with_detections = frame.copy()
# SET MIN SCORE THRESH TO MINIMUM THRESHOLD FOR DETECTIONS
viz_utils.visualize_boxes_and_labels_on_image_array(
frame_with_detections,
detections['detection_boxes'],
detections['detection_classes'],
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=200,
min_score_thresh=MIN_CONF_THRESH,
agnostic_mode=False)
cv2.imshow('Object Detector', frame_with_detections)
if cv2.waitKey(1) == ord('q'):
break
cv2.destroyAllWindows()
print("Done")
链接,https://www.bilibili.com/video/BV1wQ4y1d7Qm/
这里只进行了左右眼分为一类的识别,你可以尝试将左右眼分为两类,数据集文件已经提供,类别也已分好,自己试试吧。
我的实验结果: