标签(空格分隔): Linux 深度学习
前面,我们已经成功跑起来faster-rcnn,并且自己动手训练了PascalVOC2007图像集,但是笔者接下来继续沿着官方文件跑PascalVOC2012,训练到第二阶段的某个点,程序就开始报错退出。查看logs发现,找不到图像的Annotation文件。遂放弃,转而生成自己的图像集。
图像收集
这里笔者需要先完成毕业设计,所以图像集设计的种类不多,也就三类。毕竟PascalVOC一开始也只有四类呢!看了这么多经验帖,总结需要注意一下几个问题:文件名不要大写,后缀只能是.jpg或.JPEG,图像不要长得太过“瘦长”。这里为了重新命名收集来的文件,笔者自己写了一段Python脚本,变量命名注释极不规范,而且并不能识别某些后缀是jpg但并不是jpg的“假图”,大家看看就好,勿喷。
# -*-coding:utf-8-*-
import os
from PIL import Image
import argparse
'''
This scrpit was designed for rename these jpeg images downloaded from different websites,we can use it to rename
as such format: 0000001.jpg
'''
def readimage(FileDir):
imageset = []
for image in os.listdir(FileDir):
if os.path.isfile(os.path.join(FileDir, image)):
imageset.append(image)
return imageset
def RenameFiles(FileDir, count=1):
extlist = ['.jpg', '.jpeg']
imageset = readimage(FileDir)
for image in imageset:
extension = os.path.splitext(image)[1].lower()
if FileDir == 'kiss':
clsnum = 0
if FileDir == 'hug':
clsnum = 1
if FileDir == 'hands':
clsnum = 2
if extension in extlist:
oldname = os.path.join(FileDir, image)
newimage = '0000' + str(count)
count = count + 1
newimagename = str(clsnum) + newimage[-5:] + '.jpg'
print newimagename
newname = os.path.join(FileDir, newimagename)
os.rename(oldname, newname)
else:
print "Exist non-jpeg files: " + image
break
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='Rename demo')
parser.add_argument('-set', dest='dataset', help='choose dataset')
parser.add_argument('-count', dest='countnum', help='start number is set to 0',
default=0, type=int)
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
if args.countnum == 0:
print "please change countnum"
else:
RenameFiles(args.dataset, args.countnum)
图像标定
这里采用的是labelImg的标定工具,发动同学和学姐学弟一起帮我标定2333~~~
图像分类
这里我们需要产生和PascalVOC一样格式的txt文件,这里笔者事先研究过了,产生是有点规律的,具体见我的上一篇博客:折腾faster-rcnn(二)--数据篇
然后是几个集合的比例问题:
一般情况下trainval占总数据集的50%,test占总数据集的50%;train占trainval的50%,val占trainval的50%。上面所占百分比可根据自己的数据集修改,如果数据集比较少,test和val可少一些。这个在生成以上四个文件的脚本里面可以调比例,暂时就用默认比例试试,跑跑。生成这几个文件的脚本如下:
# -*-coding:utf-8-*-
import os
import random
import FileOperate as fo
'''
Generate txt files in directory VOC2012/ImageSet
'''
def GenerateSet(abspath, imgsetdir):
imgset = []
imgfiles = fo.readfile(os.path.join(abspath, imgsetdir))
for imgfile in imgfiles:
imgset.append(os.path.splitext(imgfile)[0])
rate = [0.5, 0.5, 0.5, 0.5]
clsset = ['kiss', 'hug', 'hands']
txtset = ['trainval.txt', 'train.txt', 'val.txt', 'test.txt']
trainvalset = []
testset = []
trainvalposset = set(random.sample(xrange(len(imgset)), int(len(imgset) * rate[0])))
testposset = set(xrange(len(imgset))) - trainvalposset
for pos in trainvalposset:
trainvalset.append(imgset[pos])
for pos in testposset:
testset.append(imgset[pos])
trainset = []
valset = []
trainposset = set(random.sample(xrange(len(trainvalset)), int(len(trainvalset) * rate[1])))
valposset = set(xrange(len(trainvalset))) - trainposset
for pos in trainposset:
trainset.append(trainvalset[pos])
for pos in valposset:
valset.append(trainvalset[pos])
fo.createtext(txtset[0], trainvalset, 2)
fo.createtext(txtset[1], trainset, 2)
fo.createtext(txtset[2], valset, 2)
fo.createtext(txtset[3], testset, 2)
for trainvalimg in trainvalset:
imgname = []
imgname.append(trainvalimg)
clsname = trainvalimg.split("_")[0]
if clsset[0] == clsname:
fo.createtext(clsset[0] + "_" + txtset[0], imgname, 1)
fo.createtext(clsset[1] + "_" + txtset[0], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[0], imgname, -1)
elif clsset[1] == clsname:
fo.createtext(clsset[0] + "_" + txtset[0], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[0], imgname, 1)
fo.createtext(clsset[2] + "_" + txtset[0], imgname, -1)
else:
fo.createtext(clsset[0] + "_" + txtset[0], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[0], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[0], imgname, 1)
for trainimg in trainset:
imgname = []
imgname.append(trainimg)
clsname = trainimg.split("_")[0]
if clsset[0] == clsname:
fo.createtext(clsset[0] + "_" + txtset[1], imgname, 1)
fo.createtext(clsset[1] + "_" + txtset[1], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[1], imgname, -1)
elif clsset[1] == clsname:
fo.createtext(clsset[0] + "_" + txtset[1], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[1], imgname, 1)
fo.createtext(clsset[2] + "_" + txtset[1], imgname, -1)
else:
fo.createtext(clsset[0] + "_" + txtset[1], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[1], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[1], imgname, 1)
for valimg in valset:
imgname = []
imgname.append(valimg)
clsname = valimg.split("_")[0]
if clsset[0] == clsname:
fo.createtext(clsset[0] + "_" + txtset[2], imgname, 1)
fo.createtext(clsset[1] + "_" + txtset[2], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[2], imgname, -1)
elif clsset[1] == clsname:
fo.createtext(clsset[0] + "_" + txtset[2], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[2], imgname, 1)
fo.createtext(clsset[2] + "_" + txtset[2], imgname, -1)
else:
fo.createtext(clsset[0] + "_" + txtset[2], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[2], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[2], imgname, 1)
for testimg in testset:
imgname = []
imgname.append(testimg)
fo.createtext(clsset[0] + "_" + txtset[3], imgname, 0)
fo.createtext(clsset[1] + "_" + txtset[3], imgname, 0)
fo.createtext(clsset[2] + "_" + txtset[3], imgname, 0)
if __name__ == "__main__":
abspath = os.path.abspath('.')
imgsetdir = 'JPEGImages'
GenerateSet(abspath, imgsetdir)
建立图像库
将以上几步搞好后,我们需要仿照PSACALVOC2007的结构建立自己的图像库,目录结构如下图所示
VOCdevkit2007
|-results
|-VOC2007
|-Main #空目录,用来存放test集结果
|-VOC2007
|-Annoations #标定文件xml
|-ImageSets #txt文件存放至此
|-JPEGImages #jpg图像存放至此
修改模型
这一步肯定是需要的,因为PascalVOC有20类,所以我们需要根据自己的需求稍微修改神经网络参数。所有参数都在/models
下,分为coco
和pascal_voc
格式,每种格式又有两到三种网络模型,每种模型还有两种训练方式Alternative training(alt-opt)
和Approximate joint training(end-to-end)
。总之,由于各种原因,本文以Alternative training+ZF model为例。笔者的需求是四类,还有一类在这里叫__background__
类,一共五类。
要修改的文件:
1.py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_rpn_train.pt
layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 5" #按训练集类别改,该值为类别数+1
}
}
2.py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt
layer {
name: 'data'
type: 'Python'
top: 'data'
top: 'rois'
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 5" #按训练集类别改,该值为类别数+1
}
}
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 5 #按训练集类别改,该值为类别数+1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 20 #按训练集类别改,该值为(类别数+1)*4,四个顶点坐标
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}
3.py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_rpn_train.pt
layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 5" #按训练集类别改,该值为类别数+1
}
}
4.py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt
layer {
name: 'data'
type: 'Python'
top: 'data'
top: 'rois'
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 5" #按训练集类别改,该值为类别数+1
}
}
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 5 #按训练集类别改,该值为类别数+1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 20 #按训练集类别改,该值为(类别数+1)*4,四个顶点坐标
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}
5.py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
inner_product_param {
num_output: 5 #按训练集类别改,该值为类别数+1
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
inner_product_param {
num_output: 20 #按训练集类别改,该值为(类别数+1)*4
}
}
注意,注意,这里文件夹进行了切换,并不属于模型的参数而是属于支持库的改动:
6.py-faster-rcnn/lib/datasets/pascal_voc.py
class pascal_voc(imdb):
def __init__(self, image_set, year, devkit_path=None):
imdb.__init__(self, 'voc_' + year + '_' + image_set)
self._year = year
self._image_set = image_set
self._devkit_path = self._get_default_path() if devkit_path is None \
else devkit_path
self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
self._classes = ('__background__', # always index 0
'kiss', 'hug', 'hands','person')# 有几个类别此处就写几个,注意第一个(索引0位置)永远是__background__类
7.py-faster-rcnn/lib/datasets/imdb.py
def append_flipped_images(self):
num_images = self.num_images
widths = [PIL.Image.open(self.image_path_at(i)).size[0]
for i in xrange(num_images)]
for i in xrange(num_images):
boxes = self.roidb[i]['boxes'].copy()
oldx1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy()
boxes[:, 0] = widths[i] - oldx2 - 1
boxes[:, 2] = widths[i] - oldx1 - 1
assert (boxes[:, 2] >= boxes[:, 0]).all()
entry = {'boxes': boxes,
'gt_overlaps': self.roidb[i]['gt_overlaps'],
'gt_classes': self.roidb[i]['gt_classes'],
'flipped': True}
self.roidb.append(entry)
self._image_index = self._image_index * 2
8.根目录执行./experiments/scripts/faster_rcnn_alt_opt.sh 0 ZF pascal_voc
,训练和测试一气呵成,并且还有日志记录。如果只需要训练,那么执行(无日志记录):
./tools/train_faster_rcnn_alt_opt.py --gpu 0 --net_name ZF --weights data/imagenet_models/ZF.v2.caffemodel --imdb voc_2007_trainval --cfg experiments/cfgs/faster_rcnn_alt_opt.yml
Q&A:
1.如果标签含有大写字母,那么修改py-faster-rcnn/lib/datasets/pascal_voc.py第209行:
cls = self._class_to_ind[obj.find('name').text.lower().strip()]
2.出现错误:
TypeError: 'NoneType' object has no attribute '__getitem__'
这是因为某些图像根本不是jpeg图像,只是后缀被改动了,这一下被学姐坑惨了。建议大家训练前统统用图转工具将图像转换一下格式,以绝后患。