代码:jwyang/faster-rcnn.pytorch,具体流程可以【参考】使用faster-rcnn.pytorch训练自己数据集(完整版),这里讲一下遇到的问题
DOTA 数据集的 label 是存在txt
里的,需要将txt
转为xml
格式(/FPN_Tensorflow_Rotation/data/io/DOTA/data_crop.py 已经将 DOTA 数据集转为 VOC 格式,images 和 labels 放在生成的/DOTA1.0/train-800
中。因此只需要将train-800
放入 Faster R-CNN 代码指定的 data 位置,然后将 images 重命名为 JPEGImage,labels 重命名为 Annotations)
因为 DOTA 中的某些目标太小,因此先选大目标进行训练。采用以下方法:
# 选择目标体积大的图片
import os
import shutil
import xml.etree.ElementTree as ET
def voc_selcet(ann_filepath, img_filepath, img_savepath, ann_savepath):
large_img_num = 0
if not os.path.exists(img_savepath):
os.mkdir(img_savepath)
if not os.path.exists(ann_savepath):
os.mkdir(ann_savepath)
for file in os.listdir(ann_filepath):
filee = os.path.join(ann_filepath, file)
tree = ET.parse(filee)
root = tree.getroot()
objs = root.findall('object')
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
# 采用 bbox.find('ymax') 还是 bbox.find('x0'),取决与 xml 文件中是怎么定义的
# 这里要与 faster-rcnn.pytorch/lib/datasets/pascal_voc.py 中236行左右保持一致!!!
# y_max = float(bbox.find('ymax').text) - 1
# y_min = float(bbox.find('ymin').text) - 1
# x_max = float(bbox.find('xmax').text) - 1
# x_min = float(bbox.find('xmin').text) - 1
# -----------------------
x1 = float(bbox.find('x0').text)
y1 = float(bbox.find('y0').text)
x2 = float(bbox.find('x1').text)
y2 = float(bbox.find('y1').text)
x3 = float(bbox.find('x2').text)
y3 = float(bbox.find('y2').text)
x4 = float(bbox.find('x3').text)
y4 = float(bbox.find('y3').text)
x_list = [x1, x2, x3, x4]
y_list = [y1, y2, y3, y4]
y_max = max(y_list)
y_min = min(y_list)
x_max = max(x_list)
x_min = min(x_list)
# -------------------------
area = (x_max - x_min + 1) * (y_max - y_min + 1)
if (area >= 100000):
print('a large img:', file)
large_img_num += 1
file_name = file.split('.')
ann_source = os.path.join(ann_filepath, file)
ann_target = os.path.join(ann_savepath, file)
img_source = os.path.join(img_filepath, file_name[0] + '.png')
img_target = os.path.join(img_savepath, file_name[0] + '.png')
shutil.copy(ann_source, ann_target)
shutil.copy(img_source, img_target)
print('large image num:', large_img_num)
if __name__ == '__main__':
ann_filepath = '/home/test_user/faster-rcnn/data/VOCdevkit2007_all/VOC2007/Annotations/'
img_filepath = '/home/test_user/faster-rcnn/data/VOCdevkit2007_all/VOC2007/JPEGImages/'
ann_savepath = '/home/test_user/faster-rcnn/data/VOCdevkit2007/VOC2007/Annotations/'
img_savepath = '/home/test_user/faster-rcnn/data/VOCdevkit2007/VOC2007/JPEGImages/'
voc_selcet(ann_filepath, img_filepath, img_savepath, ann_savepath)
print('done!')
最后,自己制作 trainval.txt等文件,里面放的是待训练图片名(可能会报“没有这个文件”的错误,需要手动新建……能力有限)。
from sklearn.model_selection import train_test_split
import os
name_path = r'./VOCdevkit2007/VOC2007/JPEGImages'
name_list = os.listdir(name_path)
names = []
for i in name_list:
names.append(i.split('.')[0])
trainval, test = train_test_split(names, test_size=0.5, shuffle=10)
val, train = train_test_split(trainval, test_size=0.5, shuffle=10)
with open('./VOCdevkit2007/VOC2007/ImageSets/Main/trainval.txt', 'w') as fw:
for i in trainval:
fw.write(i + '\n')
with open('./VOCdevkit2007/VOC2007/ImageSets/Main/test.txt', 'w') as fw:
for i in test:
fw.write(i + '\n')
with open('./VOCdevkit2007/VOC2007/ImageSets/Main/val.txt', 'w') as fw:
for i in val:
fw.write(i + '\n')
with open('./VOCdevkit2007/VOC2007/ImageSets/Main/train.txt', 'w') as fw:
for i in train:
fw.write(i + '\n')
print('done!')
最后,需要修改faster-rcnn.pytorch/lib/datasets/pascal_voc.py
中的类名,如下:
# self._classes = ('__background__', # always index 0
# 'aeroplane', 'bicycle', 'bird', 'boat',
# 'bottle', 'bus', 'car', 'cat', 'chair',
# 'cow', 'diningtable', 'dog', 'horse',
# 'motorbike', 'person', 'pottedplant',
# 'sheep', 'sofa', 'train', 'tvmonitor')
self._classes = ('__background__', # always index 0
'roundabout', 'tennis-court', 'swimming-pool', 'storage-tank', 'soccer-ball-field',
'small-vehicle', 'ship', 'plane', 'large-vehicle', 'helicopter',
'harbor', 'ground-track-field', 'bridge', 'basketball-court', 'baseball-diamond')
注意:不要改动 background!!!
训练:
CUDA_VISIBLE_DEVICES=0 python trainval_net.py --dataset pascal_voc --net vgg16 --checkepoch 10 --cuda
测试:
python test_net.py --dataset pascal_voc --net vgg16 --checkepoch 10 --checkpoint 368 --cuda
解决faster-rcnn中训练时assert(boxes[:,2]>=boxes[:,0]).all()的问题
找到了2种解决办法,我是通过 【解决方法1】 中的这段代码修改成功的:
wh = tree.find('size')
w, h = int(wh.find('width').text), int(wh.find('height').text)
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
# Make pixel indexes 0-based
x1 = float(bbox.find('xmin').text)
y1 = float(bbox.find('ymin').text)
x2 = float(bbox.find('xmax').text)
y2 = float(bbox.find('ymax').text)
x1 = max(x1, 0)
y1 = max(y1, 0)
x2 = min(x2, w)
y2 = min(y2, h)
【解决方法1】Training error. bg_num_rois = 0 and fg_num_rois = 0, this should not happen!
【解决方法2】Training error. bg_num_rois = 0 and fg_num_rois = 0, this should not happen!
如果是在 test.py 时报的错,那产生这个问题的主要原因一般是训练和测试时的某些参数不一致。需要检查parse_args()
函数等一些和参数设置有关的地方。
我的错误是因为 traincal_net.py 和 test_net.py 中,'ANCHOR_SCALES'
的数目不一致导致的,检查两个文件中,这一部分是否一样。
if args.dataset == "pascal_voc":
args.imdb_name = "voc_2007_trainval"
args.imdbval_name = "voc_2007_test"
args.set_cfgs = ['ANCHOR_SCALES', '[2, 4, 8, 16]', 'ANCHOR_RATIOS', '[0.5,1,2]']
【解决方法1】Check the anchor scales at training and demo.py , check the classes in demo.py code
【解决方法2】ERROR: pytorch1.0 test size mismatch #518