分以下几步,这几天边做边慢慢补充:
SSD: Signle Shot Detector 用于自然场景文字检测
文章中是使用ICDAR2013格式的标记数据,修改第一部分的Python代码以能转换我的Yolo格式的标记数据。代码如下:
import os
import shutil
import numpy as np
import sys
import cv2
from itertools import islice
from xml.dom.minidom import Document
def insertObject(doc, datas):
obj = doc.createElement('object')
name = doc.createElement('name')
name.appendChild(doc.createTextNode(str(datas[4]).strip(' ')))
obj.appendChild(name)
bndbox = doc.createElement('bndbox')
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(str(datas[0]).strip(' ')))
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin.appendChild(doc.createTextNode(str(datas[1]).strip(' ')))
bndbox.appendChild(ymin)
xmax = doc.createElement('xmax')
xmax.appendChild(doc.createTextNode(str(datas[2]).strip(' ')))
bndbox.appendChild(xmax)
ymax = doc.createElement('ymax')
ymax.appendChild(doc.createTextNode(str(datas[3]).strip(' ')))
bndbox.appendChild(ymax)
obj.appendChild(bndbox)
return obj
def yolo2voc_datas(yolodatas, height, width):
#Initiate the return datas
vocdatas = yolodatas
yolo_center_x = round(float(str(yolodatas[1]).strip(' ')) * width)
yolo_center_y = round(float(str(yolodatas[2]).strip(' ')) * height)
yolo_obj_width = round(float(str(yolodatas[3]).strip(' ')) * width)
yolo_obj_height = round(float(str(yolodatas[4]).strip(' ')) * height)
voc_xmin = str(int(yolo_center_x - yolo_obj_width/2))
voc_ymin = str(int(yolo_center_y - yolo_obj_height/2))
voc_xmax = str(int(yolo_center_x + yolo_obj_width/2))
voc_ymax = str(int(yolo_center_y + yolo_obj_height/2))
obj_class = 'my_obj'
vocdatas[0] = voc_xmin
vocdatas[1] = voc_ymin
vocdatas[2] = voc_xmax
vocdatas[3] = voc_ymax
vocdatas[4] = obj_class
return vocdatas
def txt_to_xml(labels_path,img_path,img_name_list_txt,xmlpath_path,bb_split,name_size):
img_name_list=np.loadtxt(img_name_list_txt,dtype=str)
name_size_file=open(dataName+'/'+name_size,'w')
#对于/ImageSets/Main/train.txt中列出的每一行文件名
for img_name in img_name_list:
print(img_name)
imageFile = img_path + img_name + '.jpg'
#读取/ImageSets/Main/train.txt中列出的每一个jpg文件
img = cv2.imread(imageFile)
#读取此文件的shape,并把height和width写入/type_name_size.txt文件
imgSize = img.shape
name_size_file.write(img_name+' '+str(imgSize[0])+' '+str(imgSize[1])+'\n')
jpgheight = imgSize[0]
jpgwidth = imgSize[1]
print("Img size was written in name_size.txt file")
#读取yolo的label文件到fidin中
sub_label=labels_path+img_name+'.txt'
fidin = open(sub_label, 'r')
flag=0
for data in islice(fidin, 0, None):
flag=flag+1
data = data.strip('\n')
datas = data.split(bb_split)
print("Flag: "+str(flag))
print("The original yolo slice data is:")
print(datas)
#Change this one piece of datas from Yolo format to VOC format
datas = yolo2voc_datas(datas, jpgheight, jpgwidth)
print("The voc slice data is:")
print(datas)
#Change done!
print("One slice done!")
#print("The slice size is: "+str(len(datas)))
if 5 != len(datas):
print(img_name+':bounding box information error')
exit(-1)
if 1 == flag:
#XML header information
xml_name = xmlpath_path+img_name+'.xml'
f = open(xml_name, "w")
doc = Document()
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
folder = doc.createElement('folder')
folder.appendChild(doc.createTextNode(dataName))
annotation.appendChild(folder)
filename = doc.createElement('filename')
filename.appendChild(doc.createTextNode(img_name+'.jpg'))
annotation.appendChild(filename)
size = doc.createElement('size')
width = doc.createElement('width')
width.appendChild(doc.createTextNode(str(imgSize[1])))
size.appendChild(width)
height = doc.createElement('height')
height.appendChild(doc.createTextNode(str(imgSize[0])))
size.appendChild(height)
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode(str(imgSize[2])))
size.appendChild(depth)
annotation.appendChild(size)
#data slice here
annotation.appendChild(insertObject(doc, datas))
else:
#data slice here
annotation.appendChild(insertObject(doc, datas))
try:
f.write(doc.toprettyxml(indent=' '))
f.close()
fidin.close()
except:
pass
name_size_file.close()
def create_list(dataName,img_list_txt,img_path,img_name_list_txt,type,debug):
if debug == 1:
return
else:
f=open(img_name_list_txt,'w')
fAll=open(dataName+'/'+img_list_txt,'w')
for name in os.listdir(img_path):
f.write(name[0:-4]+'\n')
fAll.write(dataName+'/'+'JPEGImages'+'/'+type+'/'+name[0:-4]+'.jpg'+' ')
fAll.write(dataName+'/'+'Annotations'+'/'+type+'/'+name[0:-4]+'.xml'+'\n')
f.close()
print("Create the name list txt file done!")
def my_copytree(src, dst):
names = os.listdir(src)
for name in names:
srcname = os.path.join(src, name)
try:
shutil.copy2(srcname, dst)
except:
error.traceback()
raise
def create_vocfolders(dataName, type, debug):
yolo_dir = './' + 'yoloorg'
yoloimg_dir = yolo_dir + '/yoloimg'
yolotxt_dir = yolo_dir + '/yolotxt'
if not(os.path.isdir(yolo_dir) and os.path.isdir(yoloimg_dir) and os.path.isdir(yolotxt_dir)):
print("The yolo org data folder NOT exist, please check first!")
print("Structure should be: (in current folder)")
print("Current folder")
print(" |")
print(" +--yoloorg")
print(" |")
print(" |--yoloimg")
print(" | |")
print(" | +all the jpg files here in 1200x900")
print(" |")
print(" +--yolotxt")
print(" |")
print(" +all the yolo marked txt files here")
return(0)
dataNamePath = './' + dataName
if not os.path.isdir(dataNamePath):
os.mkdir(dataNamePath)
newPath = dataNamePath + '/Annotations'
if not os.path.isdir(newPath):
os.mkdir(newPath)
newPath = dataNamePath + '/Annotations' + '/train'
if not os.path.isdir(newPath):
os.mkdir(newPath)
newPath = dataNamePath + '/Annotations' + '/test'
if not os.path.isdir(newPath):
os.mkdir(newPath)
newPath = dataNamePath + '/ImageSets'
if not os.path.isdir(newPath):
os.mkdir(newPath)
newPath = dataNamePath + '/ImageSets' + '/Main'
if not os.path.isdir(newPath):
os.mkdir(newPath)
newFile = dataNamePath + '/ImageSets' + '/Main' + '/train.txt'
if not os.path.exists(newFile):
fp=open(newFile,'w')
fp.close()
newFile = dataNamePath + '/ImageSets' + '/Main' + '/test.txt'
if not os.path.exists(newFile):
fp=open(newFile,'w')
fp.close()
newPath = dataNamePath + '/JPEGImages'
if not os.path.isdir(newPath):
os.mkdir(newPath)
newPath = dataNamePath + '/JPEGImages' + '/train'
if not os.path.isdir(newPath):
os.mkdir(newPath)
newPath = dataNamePath + '/JPEGImages' + '/test'
if not os.path.isdir(newPath):
os.mkdir(newPath)
newPath = dataNamePath + '/label'
if not os.path.isdir(newPath):
os.mkdir(newPath)
newPath = dataNamePath + '/label' + '/train'
if not os.path.isdir(newPath):
os.mkdir(newPath)
newPath = dataNamePath + '/label' + '/test'
if not os.path.isdir(newPath):
os.mkdir(newPath)
if debug != 1:
#copy yoloimg to JPEGImage/train
#copy yolotxt to label/train
newPath = dataNamePath + '/JPEGImages' + '/' + type
my_copytree(yoloimg_dir, newPath)
newPath = dataNamePath + '/label' + '/' + type
my_copytree(yolotxt_dir, newPath)
return(1)
if __name__ == '__main__':
debug = 0 #In debug, set to 1, then the jpg and txt files will NOT be copied!!! Set to 0 in normal use!!!
jpgwidth = 1200
jpgheight = 900
dataName = 'YOLOMARK' # dataset name
type = 'train' # type
bb_split=' '
create_result = create_vocfolders(dataName, type, debug)
if create_result == 1:
print("Create folders and copy img/txt done!")
img_path = dataName + '/JPEGImages/' + type + '/'
labels_path = dataName + '/label/' + type + '/'
xml_path = dataName + '/Annotations/'+ type + '/'
img_name_list_txt = dataName + '/ImageSets/Main/' + type + '.txt'
img_list_txt = type + '.txt'
name_size = type + '_name_size.txt'
create_list(dataName,img_list_txt,img_path,img_name_list_txt,type,debug)
txt_to_xml(labels_path,img_path,img_name_list_txt,xml_path,bb_split,name_size)
由此得到了VOC格式的数据。
2、VOC数据转LMDB数据
按照 caffe SSD目标检测lmdb数据格式制作 中第四步,制作LMDB数据
其中,create_data.sh略作修改如下:
cur_dir=$(cd $( dirname ${BASH_SOURCE[0]} ) && pwd )
redo=1
#VOC格式数据存放的文件夹
data_root_dir="$cur_dir/mydataset"
#训练集还是测试集,只是标识一下,就是放在一个文件夹里,放test或者train都是可以的,这样只是为了方便切换相同数据库的不同文件夹
type=train
#数据库名称,只是标记VOC数据在mydataset下面的哪个文件夹里面,结果又放在哪个文件夹里面。
dataset_name="YOLOMARK"
mapfile="$cur_dir/result/$dataset_name/labelmap_$dataset_name.prototxt"
anno_type="detection"
db="lmdb"
min_dim=0
max_dim=0
width=0
height=0
extra_cmd="--encode-type=jpg --encoded"
if [ $redo ]
then
extra_cmd="$extra_cmd --redo"
fi
for subset in $type
do
#最后一个参数是快捷方式所在的位置,不用建这个文件夹,但是为了代码改的少参数还是要有,我们在下面的create_annoset.py注释掉了生成快捷方式那句。
python3 create_annoset.py --anno-type=$anno_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir result/$dataset_name/$subset.txt result/$dataset_name/$dataset_name"_"$subset"_"$db result/$dataset_name
done
由此得到LMDB数据。
3、建立Ubuntu16.04下的Caffe训练环境(带GPU,用于训练模型)
参考以下两篇文章:
ubuntu16.04 opencv3.3 python2.7 caffe GPU环境搭建
深度学习环境配置Ubuntu16.04+CUDA8.0+CuDNN+Anaconda2+openCV2.4.9+caffe(全离线GPU版)
- 安装Ubuntu16.04
- 更新源为阿里源,选择显卡(1080ti)的驱动为384.11
- 检查gcc,g++版本为5.4
- /usr/bin中的python重新做链接到python3.5。默认用python3.5
- 安装CUDA 8.0版(使用ubuntu16.04的run文件),安装CUDNN6.0版
- 安装cmake
- 安装opencv3.3.0和opencv-contrib3.3.0。opencv3.3.0的cmake编译命令为:
cmake -D CMAKE_BUILD_TYPE=RELEASE \
-D CMAKE_INSTALL_PREFIX=/usr/local \
-D INSTALL_C_EXAMPLES=ON \
-D INSTALL_PYTHON_EXAMPLES=ON \
-D BUILD_EXAMPLES=ON \
-D OPENCV_EXTRA_MODULES_PATH=/lance/opencv_contrib-3.3.0/modules \
-D PYTHON_EXECUTABLE=/usr/bin/python \
-D BUILD_NEW_PYTHON_SUPPORT=ON \
-D WITH_TBB=ON \
-D WITH_V4L=ON \
-D WITH_OPENGL=ON \
-D WITH_CUDA=ON \
-D WITH_GTK=ON \
-D WITH_CUBLAS=1 \
-D ENABLE_FAST_MATH=1 \
-D CUDA_FAST_MATH=1 \
-D CUDA_NVCC_FLAGS="-D_FORCE_INLINES" \
-D WITH_CUBLAS=1 \
-D BUILD_TIFF=ON ..
- 安装libopenblas-dev,liblapack-dev,OpenBLAS
- 安装caffe-ssd。在.bashrc中加入
export PYTHONPATH=/lance/caffe-ssd/python:$PYTHONPATH
- 安装pip3,安装opencv_python,安装python-numpy,python3-numpy,python-skimage,protobuf
试一下,在python下是否能正确import cv2和caffe
- 安装spyder3
由此得到可以用来训练caffe模型的环境。
6、建立RaspberryPi+ncsdk的NCS运行环境,观察树莓派下的效果(不带GPU,只有NCS,用于运行模型)