姓名:王咫毅
学号:19021211150
【嵌牛导读】在进行机器学习的时候,经常遇到训练自己的数据集的情况,如何定义自己的数据集来进行训练?本文便是一个例子,使用自定义的voc2007d的数据集进行目标检测。
【嵌牛鼻子】目标检测 voc2007
【嵌牛提问】如何定义自己的数据集进行访问?
【嵌牛正文】
转载自:https://blog.csdn.net/tingxueyou/article/details/59110760
在目标检测时候往往需要使用自己的图片集来测试性能,所以本文介绍如何使用实现对自己数据集的训练和验证过程,内容包括:
1 数据集的标注
2 数据集的转换
1 数据集的标注
数据的标注使用BBox-Label-Tool工具,该工具使用python实现,使用简单方便。修改后的工具支持多label的标签标注。
该工具生成的标签格式是:
object_number编号
className类别名 x1min y1min x1max y1max
classname类别名 x2min y2min x2max y2max
1.1 labelTool工具的使用说明
BBox-Label-Tool工具实现较简单,下面为源代码的修改后的代码:
#-------------------------------------------------------------------------------
# Name: Object bounding box label tool
# Purpose: Label object bboxes for ImageNet Detection data
# Created: 06/06/2014
#
#-------------------------------------------------------------------------------
from __future__ import division
from Tkinter import *
import tkMessageBox
from PIL import Image, ImageTk
import os
import glob
import random
# colors for the bboxes
COLORS = ['red', 'blue', 'yellow', 'pink', 'cyan', 'green', 'black']
# image sizes for the examples
SIZE = 256, 256
classLabels=['mat', 'door', 'sofa', 'chair', 'table', 'bed', 'ashcan', 'shoe']
class LabelTool():
def __init__(self, master):
# set up the main frame
self.parent = master
self.parent.title("LabelTool")
self.frame = Frame(self.parent)
self.frame.pack(fill=BOTH, expand=1)
self.parent.resizable(width = False, height = False)
# initialize global state
self.imageDir = ''
self.imageList= []
self.egDir = ''
self.egList = []
self.outDir = ''
self.cur = 0
self.total = 0
self.category = 0
self.imagename = ''
self.labelfilename = ''
self.tkimg = None
# initialize mouse state
self.STATE = {}
self.STATE['click'] = 0
self.STATE['x'], self.STATE['y'] = 0, 0
# reference to bbox
self.bboxIdList = []
self.bboxId = None
self.bboxList = []
self.hl = None
self.vl = None
self.currentClass = ''
# ----------------- GUI stuff ---------------------
# dir entry & load
self.label = Label(self.frame, text = "Image Dir:")
self.label.grid(row = 0, column = 0, sticky = E)
self.entry = Entry(self.frame)
self.entry.grid(row = 0, column = 1, sticky = W+E)
self.ldBtn = Button(self.frame, text = "Load", command = self.loadDir)
self.ldBtn.grid(row = 0, column = 2, sticky = W+E)
# main panel for labeling
self.mainPanel = Canvas(self.frame, cursor='tcross')
self.mainPanel.bind("
self.mainPanel.bind("
self.parent.bind("
self.parent.bind("s", self.cancelBBox)
self.parent.bind("a", self.prevImage) # press 'a' to go backforward
self.parent.bind("d", self.nextImage) # press 'd' to go forward
self.mainPanel.grid(row = 1, column = 1, rowspan = 4, sticky = W+N)
# showing bbox info & delete bbox
self.lb1 = Label(self.frame, text = 'Bounding boxes:')
self.lb1.grid(row = 1, column = 2, sticky = W+N)
self.listbox = Listbox(self.frame, width = 22, height = 12)
self.listbox.grid(row = 2, column = 2, sticky = N)
self.btnDel = Button(self.frame, text = 'Delete', command = self.delBBox)
self.btnDel.grid(row = 3, column = 2, sticky = W+E+N)
self.btnClear = Button(self.frame, text = 'ClearAll', command = self.clearBBox)
self.btnClear.grid(row = 4, column = 2, sticky = W+E+N)
#select class type
self.classPanel = Frame(self.frame)
self.classPanel.grid(row = 5, column = 1, columnspan = 10, sticky = W+E)
label = Label(self.classPanel, text = 'class:')
label.grid(row = 5, column = 1, sticky = W+N)
self.classbox = Listbox(self.classPanel, width = 4, height = 2)
self.classbox.grid(row = 5,column = 2)
for each in range(len(classLabels)):
function = 'select' + classLabels[each]
print classLabels[each]
btnMat = Button(self.classPanel, text = classLabels[each], command = getattr(self, function))
btnMat.grid(row = 5, column = each + 3)
# control panel for image navigation
self.ctrPanel = Frame(self.frame)
self.ctrPanel.grid(row = 6, column = 1, columnspan = 2, sticky = W+E)
self.prevBtn = Button(self.ctrPanel, text='<< Prev', width = 10, command = self.prevImage)
self.prevBtn.pack(side = LEFT, padx = 5, pady = 3)
self.nextBtn = Button(self.ctrPanel, text='Next >>', width = 10, command = self.nextImage)
self.nextBtn.pack(side = LEFT, padx = 5, pady = 3)
self.progLabel = Label(self.ctrPanel, text = "Progress: / ")
self.progLabel.pack(side = LEFT, padx = 5)
self.tmpLabel = Label(self.ctrPanel, text = "Go to Image No.")
self.tmpLabel.pack(side = LEFT, padx = 5)
self.idxEntry = Entry(self.ctrPanel, width = 5)
self.idxEntry.pack(side = LEFT)
self.goBtn = Button(self.ctrPanel, text = 'Go', command = self.gotoImage)
self.goBtn.pack(side = LEFT)
# example pannel for illustration
self.egPanel = Frame(self.frame, border = 10)
self.egPanel.grid(row = 1, column = 0, rowspan = 5, sticky = N)
self.tmpLabel2 = Label(self.egPanel, text = "Examples:")
self.tmpLabel2.pack(side = TOP, pady = 5)
self.egLabels = []
for i in range(3):
self.egLabels.append(Label(self.egPanel))
self.egLabels[-1].pack(side = TOP)
# display mouse position
self.disp = Label(self.ctrPanel, text='')
self.disp.pack(side = RIGHT)
self.frame.columnconfigure(1, weight = 1)
self.frame.rowconfigure(10, weight = 1)
# for debugging
## self.setImage()
## self.loadDir()
def loadDir(self, dbg = False):
if not dbg:
s = self.entry.get()
self.parent.focus()
self.category = int(s)
else:
s = r'D:\workspace\python\labelGUI'
## if not os.path.isdir(s):
## tkMessageBox.showerror("Error!", message = "The specified dir doesn't exist!")
## return
# get image list
self.imageDir = os.path.join(r'./Images', '%d' %(self.category))
self.imageList = glob.glob(os.path.join(self.imageDir, '*.jpg'))
if len(self.imageList) == 0:
print 'No .JPEG images found in the specified dir!'
return
# set up output dir
self.outDir = os.path.join(r'./Labels', '%d' %(self.category))
if not os.path.exists(self.outDir):
os.mkdir(self.outDir)
labeledPicList = glob.glob(os.path.join(self.outDir, '*.txt'))
for label in labeledPicList:
data = open(label, 'r')
if '0\n' == data.read():
data.close()
continue
data.close()
picture = label.replace('Labels', 'Images').replace('.txt', '.jpg')
if picture in self.imageList:
self.imageList.remove(picture)
# default to the 1st image in the collection
self.cur = 1
self.total = len(self.imageList)
self.loadImage()
print '%d images loaded from %s' %(self.total, s)
def loadImage(self):
# load image
imagepath = self.imageList[self.cur - 1]
self.img = Image.open(imagepath)
self.imgSize = self.img.size
self.tkimg = ImageTk.PhotoImage(self.img)
self.mainPanel.config(width = max(self.tkimg.width(), 400), height = max(self.tkimg.height(), 400))
self.mainPanel.create_image(0, 0, image = self.tkimg, anchor=NW)
self.progLabel.config(text = "%04d/%04d" %(self.cur, self.total))
# load labels
self.clearBBox()
self.imagename = os.path.split(imagepath)[-1].split('.')[0]
labelname = self.imagename + '.txt'
self.labelfilename = os.path.join(self.outDir, labelname)
bbox_cnt = 0
if os.path.exists(self.labelfilename):
with open(self.labelfilename) as f:
for (i, line) in enumerate(f):
if i == 0:
bbox_cnt = int(line.strip())
continue
tmp = [int(t.strip()) for t in line.split()]
## print tmp
self.bboxList.append(tuple(tmp))
tmpId = self.mainPanel.create_rectangle(tmp[0], tmp[1], \
tmp[2], tmp[3], \
width = 2, \
outline = COLORS[(len(self.bboxList)-1) % len(COLORS)])
self.bboxIdList.append(tmpId)
self.listbox.insert(END, '(%d, %d) -> (%d, %d)' %(tmp[0], tmp[1], tmp[2], tmp[3]))
self.listbox.itemconfig(len(self.bboxIdList) - 1, fg = COLORS[(len(self.bboxIdList) - 1) % len(COLORS)])
def saveImage(self):
with open(self.labelfilename, 'w') as f:
f.write('%d\n' %len(self.bboxList))
for bbox in self.bboxList:
f.write(' '.join(map(str, bbox)) + '\n')
print 'Image No. %d saved' %(self.cur)
def mouseClick(self, event):
if self.STATE['click'] == 0:
self.STATE['x'], self.STATE['y'] = event.x, event.y
#self.STATE['x'], self.STATE['y'] = self.imgSize[0], self.imgSize[1]
else:
x1, x2 = min(self.STATE['x'], event.x), max(self.STATE['x'], event.x)
y1, y2 = min(self.STATE['y'], event.y), max(self.STATE['y'], event.y)
if x2 > self.imgSize[0]:
x2 = self.imgSize[0]
if y2 > self.imgSize[1]:
y2 = self.imgSize[1]
self.bboxList.append((self.currentClass, x1, y1, x2, y2))
self.bboxIdList.append(self.bboxId)
self.bboxId = None
self.listbox.insert(END, '(%d, %d) -> (%d, %d)' %(x1, y1, x2, y2))
self.listbox.itemconfig(len(self.bboxIdList) - 1, fg = COLORS[(len(self.bboxIdList) - 1) % len(COLORS)])
self.STATE['click'] = 1 - self.STATE['click']
def mouseMove(self, event):
self.disp.config(text = 'x: %d, y: %d' %(event.x, event.y))
if self.tkimg:
if self.hl:
self.mainPanel.delete(self.hl)
self.hl = self.mainPanel.create_line(0, event.y, self.tkimg.width(), event.y, width = 2)
if self.vl:
self.mainPanel.delete(self.vl)
self.vl = self.mainPanel.create_line(event.x, 0, event.x, self.tkimg.height(), width = 2)
if 1 == self.STATE['click']:
if self.bboxId:
self.mainPanel.delete(self.bboxId)
self.bboxId = self.mainPanel.create_rectangle(self.STATE['x'], self.STATE['y'], \
event.x, event.y, \
width = 2, \
outline = COLORS[len(self.bboxList) % len(COLORS)])
def cancelBBox(self, event):
if 1 == self.STATE['click']:
if self.bboxId:
self.mainPanel.delete(self.bboxId)
self.bboxId = None
self.STATE['click'] = 0
def delBBox(self):
sel = self.listbox.curselection()
if len(sel) != 1 :
return
idx = int(sel[0])
self.mainPanel.delete(self.bboxIdList[idx])
self.bboxIdList.pop(idx)
self.bboxList.pop(idx)
self.listbox.delete(idx)
def clearBBox(self):
for idx in range(len(self.bboxIdList)):
self.mainPanel.delete(self.bboxIdList[idx])
self.listbox.delete(0, len(self.bboxList))
self.bboxIdList = []
self.bboxList = []
def selectmat(self):
self.currentClass = 'mat'
self.classbox.delete(0,END)
self.classbox.insert(0, 'mat')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectdoor(self):
self.currentClass = 'door'
self.classbox.delete(0,END)
self.classbox.insert(0, 'door')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectsofa(self):
self.currentClass = 'sofa'
self.classbox.delete(0,END)
self.classbox.insert(0, 'sofa')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectchair(self):
self.currentClass = 'chair'
self.classbox.delete(0,END)
self.classbox.insert(0, 'chair')
self.classbox.itemconfig(0,fg = COLORS[0])
def selecttable(self):
self.currentClass = 'table'
self.classbox.delete(0,END)
self.classbox.insert(0, 'table')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectbed(self):
self.currentClass = 'bed'
self.classbox.delete(0,END)
self.classbox.insert(0, 'bed')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectashcan(self):
self.currentClass = 'ashcan'
self.classbox.delete(0,END)
self.classbox.insert(0, 'ashcan')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectshoe(self):
self.currentClass = 'shoe'
self.classbox.delete(0,END)
self.classbox.insert(0, 'shoe')
self.classbox.itemconfig(0,fg = COLORS[0])
def prevImage(self, event = None):
self.saveImage()
if self.cur > 1:
self.cur -= 1
self.loadImage()
def nextImage(self, event = None):
self.saveImage()
if self.cur < self.total:
self.cur += 1
self.loadImage()
def gotoImage(self):
idx = int(self.idxEntry.get())
if 1 <= idx and idx <= self.total:
self.saveImage()
self.cur = idx
self.loadImage()
## def setImage(self, imagepath = r'test2.png'):
## self.img = Image.open(imagepath)
## self.tkimg = ImageTk.PhotoImage(self.img)
## self.mainPanel.config(width = self.tkimg.width())
## self.mainPanel.config(height = self.tkimg.height())
## self.mainPanel.create_image(0, 0, image = self.tkimg, anchor=NW)
if __name__ == '__main__':
root = Tk()
tool = LabelTool(root)
root.mainloop()
main.py
使用方法:
(1) 在BBox-Label-Tool/Images目录下创建保存图片的目录, 目录以数字命名(BBox-Label-Tool/Images/1), 然后将待标注的图片copy到1这个目录下;
(2) 在BBox-Label-Tool目录下执行命令 python main.py
(3) 在工具界面上, Image Dir 框中输入需要标记的目录名(比如 1), 然后点击load按钮, 工具自动将Images/1目录下的图片加载进来;需要说明一下, 如果目录中的图片已经标注过,点击load时不会被重新加载进来.
(4) 该工具支持多类别标注, 画bounding boxs框标定之前,需要先选定类别,然后再画框.
(5) 一张图片标注完后, 点击Next>>按钮, 标注下一张图片, 图片label成功后,会在BBox-Label-Tool/Labels对应的目录下生成与图片文件名对应的label文件.
注意将该文件夹保存为label名字
(github上也有另外一个开源的label-box标注工具labelImg,由于是GUI界面,操作非常方便,也可以将提取的bbox生成为txt格式文件)
2 voc数据格式
(1)Annotations中保存的是xml格式的label信息
(2)ImageSet目录下的Main目录里存放的是用于表示训练的图片集和测试的图片集
(3)JPEGImages目录下存放所有图片集
(4)label目录下保存的是BBox-Label-Tool工具标注好的bounding box坐标文件,该目录下的文件就是待转换的label标签文件。
3 Label转换成VOC数据格式
BBox-Label-Tool工具标注好的bounding box坐标文件转换成VOC数据格式的形式.具体的转换过程包括了两个步骤:
(1)将BBox-Label-Tool下的txt格式保存的bounding box信息转换成VOC数据格式下以xml方式表示;
(2)生成用于训练的数据集和用于测试的数据集。
首先建立一个VOC2007文件夹,在其下面建立'JPEGImages','Annotations‘, ’label‘文件夹,将1步骤中生成的的所有txt文件转放到该label文件夹下,并将所有的图片转移到JPEGImages文件夹下。
建立createXml.py 完成txt到xml的转换脚本, 放到和label文件夹同一目录下,执行脚本python createXml.py,生成xml。
#!/usr/bin/env python
import os
import sys
import cv2
from itertools import islice
from xml.dom.minidom import Document
labels='label'
imgpath='JPEGImages/'
xmlpath_new='Annotations/'
foldername='VOC2007'
def insertObject(doc, datas):
obj = doc.createElement('object')
name = doc.createElement('name')
name.appendChild(doc.createTextNode(datas[0]))
obj.appendChild(name)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
obj.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode(str(0)))
obj.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode(str(0)))
obj.appendChild(difficult)
bndbox = doc.createElement('bndbox')
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(str(datas[1])))
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin.appendChild(doc.createTextNode(str(datas[2])))
bndbox.appendChild(ymin)
xmax = doc.createElement('xmax')
xmax.appendChild(doc.createTextNode(str(datas[3])))
bndbox.appendChild(xmax)
ymax = doc.createElement('ymax')
if '\r' == str(datas[4])[-1] or '\n' == str(datas[4])[-1]:
data = str(datas[4])[0:-1]
else:
data = str(datas[4])
ymax.appendChild(doc.createTextNode(data))
bndbox.appendChild(ymax)
obj.appendChild(bndbox)
return obj
def create():
for walk in os.walk(labels):
for each in walk[2]:
fidin=open(walk[0] + '/'+ each,'r')
objIndex = 0
for data in islice(fidin, 1, None):
objIndex += 1
data=data.strip('\n')
datas = data.split(' ')
if 5 != len(datas):
print 'bounding box information error'
continue
pictureName = each.replace('.txt', '.jpg')
imageFile = imgpath + pictureName
img = cv2.imread(imageFile)
imgSize = img.shape
if 1 == objIndex:
xmlName = each.replace('.txt', '.xml')
f = open(xmlpath_new + xmlName, "w")
doc = Document()
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
folder = doc.createElement('folder')
folder.appendChild(doc.createTextNode(foldername))
annotation.appendChild(folder)
filename = doc.createElement('filename')
filename.appendChild(doc.createTextNode(pictureName))
annotation.appendChild(filename)
source = doc.createElement('source')
database = doc.createElement('database')
database.appendChild(doc.createTextNode('My Database'))
source.appendChild(database)
source_annotation = doc.createElement('annotation')
source_annotation.appendChild(doc.createTextNode(foldername))
source.appendChild(source_annotation)
image = doc.createElement('image')
image.appendChild(doc.createTextNode('flickr'))
source.appendChild(image)
flickrid = doc.createElement('flickrid')
flickrid.appendChild(doc.createTextNode('NULL'))
source.appendChild(flickrid)
annotation.appendChild(source)
owner = doc.createElement('owner')
flickrid = doc.createElement('flickrid')
flickrid.appendChild(doc.createTextNode('NULL'))
owner.appendChild(flickrid)
name = doc.createElement('name')
name.appendChild(doc.createTextNode('idaneel'))
owner.appendChild(name)
annotation.appendChild(owner)
size = doc.createElement('size')
width = doc.createElement('width')
width.appendChild(doc.createTextNode(str(imgSize[1])))
size.appendChild(width)
height = doc.createElement('height')
height.appendChild(doc.createTextNode(str(imgSize[0])))
size.appendChild(height)
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode(str(imgSize[2])))
size.appendChild(depth)
annotation.appendChild(size)
segmented = doc.createElement('segmented')
segmented.appendChild(doc.createTextNode(str(0)))
annotation.appendChild(segmented)
annotation.appendChild(insertObject(doc, datas))
else:
annotation.appendChild(insertObject(doc, datas))
try:
f.write(doc.toprettyxml(indent = ' '))
f.close()
fidin.close()
except:
pass
if __name__ == '__main__':
create()
新建文件夹,命名为ImageSets,在ImageSets里再新建文件夹,命名为Main。我们可以通过xml名字,生成四个txt文件,即利用createTest.py 生成训练集和测试集标识文。
main文件夹下会生成test.txt和train.txt,val.txt以及trainval.txt文件。test.txt是测试集,train.txt是训练集,val.txt是验证集,trainval.txt是训练和验证集.VOC2007中,trainval大概是整个数据集的50%,test也大概是整个数据集的50%;
train大概是trainval的50%,val大概是trainval的50%
将生成的JPEGImages,JPEGImages,ImagesSets替换掉VOC2007中的五个文件夹,制作完成。
参考博客:http://www.cnblogs.com/objectDetect/p/5780006.html
http://www.voidcn.com/blog/sinat_30071459/article/p-5745727.html