解压方法:
unzip ssd_300_vgg.ckpt.zip
# -*- coding: UTF-8 -*-
#程序功能:按照比例随机划分训练集、测试集和验证集
#读取xml文件存入xml_list
#随机读取xml_list的内容,按照比例存储在对应的txt中
#保存txt文件
import os, random
import copy#use copy.deepcopy to copy list
#xml的路径以及分配后的txt存储路径
xml_path = 'E:\Research\ShellDataset_Plus\ShellDataset\Annotations'
txt_path = 'E:\Research\ShellDataset_Plus\ShellDataset\JPEGImages'
test_percent = 0.1#测试集所占比例
val_percent = 0.1#验证集所占比例
xml_list = os.listdir(xml_path)
#remove the extend .xml
xml_list = [xml.split('.')[0] for xml in xml_list]
xml_len = len(xml_list)
#allot randomly
test = random.sample(xml_list, int(xml_len * test_percent))
trainval = copy.deepcopy(xml_list)
for xml in test:
trainval.remove(xml)
val = random.sample(trainval, int(len(trainval) * val_percent))
train = copy.deepcopy(trainval)
for xml in val:
train.remove(xml)
#renew txt and write
trainval_path = open(os.path.join(txt_path,'trainval.txt'),'w+')
train_path = open(os.path.join(txt_path,'train.txt'),'w+')
val_path = open(os.path.join(txt_path,'val.txt'),'w+')
test_path = open(os.path.join(txt_path,'test.txt'),'w+')
# add enter to each line
for trainval_xml in trainval:
trainval_path.write(trainval_xml+'\n')
for train_xml in train:
train_path.write(train_xml+'\n')
for val_xml in val:
val_path.write(val_xml+'\n')
for test_xml in test:
test_path.write(test_xml+'\n')
trainval_path.close()
train_path.close()
val_path.close()
test_path.close()
执行完会自动生成 train.txt, trainval.txt, test.txt, val.txt 文件~
2.3.1. 修改 pascalvoc_common.py
# class表示自己数据集的标签,有几个写几个,此处列举3个
# CLASS表示大类,所有的CLASS可以相同,可以随便写
VOC_LABELS = {
'none': (0, 'Background'),
'class1': (1, 'CLASS1'),
'class2': (2, 'CLASS2'),
'class3': (3, 'CLASS3'),
}
2.3.2. 修改 pascalvoc_to_tfrecords.py
# 修改67行,可以修改几张图片转为一个tfrecords,此处100表示每100张图片生成1个.tfrecords文件
SAMPLES_PER_FILES = 100
# 然后更改文件的83行读取方式为’rb’,修改为如下形式
image_data = tf.gfile.FastGFile(filename, 'rb').read()
2.3.3. 创建 tf_conver_data.sh
DATASET_DIR=./VOC2007/
OUTPUT_DIR=./tfrecords
python tf_convert_data.py \
--dataset_name=pascalvoc \
--dataset_dir=${DATASET_DIR} \
--output_name=voc_2007_train \
--output_dir=${OUTPUT_DIR}
2.3.4. 运行 tf_conver_data.sh
首先,赋予 tf_conver_data.sh 文件执行权限,再将格式标准化.
使用 vim 编辑器打开文件,并执行以下命令:
set ff=unix
保存并退出(命令:ESC ——> :wq)
在控制台执行:
chmod +x tf_conver_data.sh
sed -i 's/\r$//g' tf_conver_data.sh
./tf_conver_data.sh
执行效果:
ligy@lab729:~/SSD-Tensorflow-master$ ./tf_convert_data.sh
Dataset directory: ./VOC2007/
Output directory: ./tfrecords
>> Converting image 10080/10080
Finished converting the Pascal VOC dataset!
进入 tfrecords 文件夹,可以发现训练数据已经转换为 .tfrecords 格式.
3.1.1. 修改 train_ssd_network.py
# 第63行,可以将600修改,表示多长时间保存一次模型
tf.app.flags.DEFINE_integer(
'save_interval_secs', 600,
'The frequency with which the model is saved, in seconds.')
# 修改第154行的最大训练步数,将None改为比如40000
(tf.contrib.slim.learning.training函数中max-step为None时训练会无限进行。)
3.2.2. 修改 nets/ssd_vgg_300.py
default_params = SSDParams(
img_shape=(300, 300),
num_classes=21, #根据自己的数据修改为类别+1,此处的 1 表示背景标签
no_annotation_label=21, #根据自己的数据修改为类别+1
feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11'],
feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
anchor_size_bounds=[0.15, 0.90],
# anchor_size_bounds=[0.20, 0.90],
3.2.3. 修改 train_ssd_network.py
# 根据自己的数据修改为类别+1
tf.app.flags.DEFINE_integer(
'num_classes', 21, 'Number of classes to use in the dataset.')
3.2.4. 修改 eval_ssd_network.py
# =========================================================================== #
# Main evaluation flags.
# =========================================================================== #
tf.app.flags.DEFINE_integer(
'num_classes', 9, 'Number of classes to use in the dataset.')
3.2.5. 修改 datasets/pascalvoc_2007.py
TRAIN_STATISTICS = {
'none': (0, 0),
'car': (192, 234), 192表示图片的数量,234表示边界框的数量
'person': (1554, 2862),
'dog': (558, 1230),
...
'total': (10080, 18798), 10080表示训练图片的综述,18798表示边界框的总数
}
TEST_STATISTICS = {
'none': (0, 0),
'car': (1, 1),
'person': (1, 1),
'dog': (1, 1),
...
'total': (20, 20),
}
SPLITS_TO_SIZES = {
'train': 8165, #训练的图片数量
'test': 1008, #测试的图片数量
}
SPLITS_TO_STATISTICS = {
'train': TRAIN_STATISTICS,
'test': TEST_STATISTICS,
}
NUM_CLASSES = 20 #数据集的种类,此处不用加上背景
在SSD根目录下编写训练脚本 train.sh
DATASET_DIR=./tfrecords
TRAIN_DIR=./log/
CHECKPOINT_PATH=./checkpoints/ssd_300_vgg.ckpt
python train_ssd_network.py \
--train_dir=${TRAIN_DIR} \
--dataset_dir=${DATASET_DIR} \
--dataset_name=pascalvoc_2007 \
--dataset_split_name=train \
--model_name=ssd_300_vgg \
--checkpoint_path=${CHECKPOINT_PATH} \
--checkpoint_exclude_scopes =ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \
--save_summaries_secs=60 \
--save_interval_secs=600 \
--weight_decay=0.0005 \
--optimizer=adam \
--learning_rate=0.001 \
--batch_size=8 \
--ignore_missing_vars = True \
赋予 train.sh 文件执行权限,再将格式标准化.
使用 vim 编辑器打开文件,并执行以下命令:
set ff=unix
保存并退出(命令:ESC ——> :wq)
在控制台执行:
chmod +x train.sh
sed -i 's/\r$//g' train.sh
./train.sh
此处训练时间由训练迭代次数、GPU等决定。
将需要测试的数据集转换为 .tfrecords 格式
在 VOC2007 中创建一个test文件夹,用于存放需要验证的数据
DATASET_DIR=./VOC2007/
OUTPUT_DIR=./tfrecords/test
python tf_convert_data.py \
--dataset_name=pascalvoc \
--dataset_dir=${DATASET_DIR} \
--output_name=voc_2007_test \
--output_dir=${OUTPUT_DIR}
# -*- coding: UTF-8 -*-
DATASET_DIR=./tfrecords/test/ #保存的转换为tfrcodes格式的数据
EVAL_DIR=./log_eval/ # Directory where the results are saved to
CHECKPOINT_PATH=./log/model.ckpt-40000 #换为自己训练的模型
python3 ./eval_ssd_network.py \
--eval_dir=${EVAL_DIR} \
--dataset_dir=${DATASET_DIR} \
--dataset_name=pascalvoc_2007 \
--dataset_split_name=test \
--model_name=ssd_300_vgg \
--checkpoint_path=${CHECKPOINT_PATH} \
--batch_size=1
执行结果:
···
INFO:tensorflow:Evaluation [100/1008]
INFO:tensorflow:Evaluation [200/1008]
INFO:tensorflow:Evaluation [300/1008]
INFO:tensorflow:Evaluation [400/1008]
INFO:tensorflow:Evaluation [500/1008]
INFO:tensorflow:Evaluation [600/1008]
INFO:tensorflow:Evaluation [700/1008]
INFO:tensorflow:Evaluation [800/1008]
INFO:tensorflow:Evaluation [900/1008]
INFO:tensorflow:Evaluation [1000/1008]
INFO:tensorflow:Evaluation [1008/1008]
2019-03-29 10:40:43.982762: W ./tensorflow/core/grappler/optimizers/graph_optimizer_stage.h:230] Failed to run optimizer ArithmeticOptimizer, stage HoistCommonFactor. Error: Node average_precision_voc07/ArithmeticOptimizer/HoistCommonFactor_Add_AddN is missing output properties at position :0 (num_outputs=0)
AP_VOC07/mAP[0.80815487204921244]
AP_VOC12/mAP[0.84965517094243659]
INFO:tensorflow:Finished evaluation at 2019-03-29-02:40:44
Time spent : 53.065 seconds.
Time spent per BATCH: 0.053 seconds.
可以看到执行完毕!
4.2.1. 创建并修改 ssd_notebook.py
import os
import math
import random
import numpy as np
import tensorflow as tf
import cv2
slim = tf.contrib.slim
#matplotlib inline
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import sys
sys.path.append('../')
from nets import ssd_vgg_300, ssd_common, np_methods
from preprocessing import ssd_vgg_preprocessing
from notebooks import visualization
# TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
isess = tf.InteractiveSession(config=config)
# Input placeholder.
net_shape = (300, 300)
data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)
# Define the SSD model.
reuse = True if 'ssd_net' in locals() else None
ssd_net = ssd_vgg_300.SSDNet()
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)
# Restore SSD model.
ckpt_filename = '../log/model.ckpt-40000' #此处改为自己的模型
isess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(isess, ckpt_filename)
# SSD default anchor boxes.
ssd_anchors = ssd_net.anchors(net_shape)
# Main image processing routine.
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):
# Run SSD network.
rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d,
predictions, localisations, bbox_img],feed_dict={img_input: img})
# Get classes and bboxes from the net outputs.
rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
rpredictions, rlocalisations, ssd_anchors,
select_threshold=select_threshold, img_shape=net_shape, num_classes=9, decode=True)
rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400)
rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
# Resize bboxes to original image shape. Note: useless for Resize.WARP!
rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
return rclasses, rscores, rbboxes
# Test on some demo image and visualize output.
path = '../demo/' #存放验证图片的位置
image_names = sorted(os.listdir(path))
for i in range(19): #此处的19为本人的demo下放了19张图片,可以一次性检测19张图片
img = mpimg.imread(path + image_names[-i+1])
rclasses, rscores, rbboxes = process_image(img)
# visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)
visualization.plt_bboxes(i, img, rclasses, rscores, rbboxes)
4.2.2. 修改 visualization.py
====================================================================================
# 修改45行,将函数定义中的参数添加一个 img
def draw_lines(img, lines, color=[255, 0, 0], thickness=2):
====================================================================================
# 将最后一行的
plt.show()
# 修改为
plt.savefig("%d.jpg"%(nm+1))
====================================================================================
执行 ssd_notebook.py ,即可在notebooks文件夹下看到检测的结果。
参考资料:
[1]. SSD-tensorflow 训练自己的数据并显示训练结果
[2]. 目标检测SSD+Tensorflow 训练自己的数据集