在此之前需要首先安装pycocotools,网上很多人说直接sudo pip install安装不了,要去git上下载coco的api然后编译python的接口,然而我按照这样的方式出现问题了。最后确实是先安装cython然后通过sudo pip install pycocotools这种方式安装好的(对了,听说win不支持,然而我也没有试过所以不知道),这里大家可以自行查阅一下pycocotools怎么安装的,毕竟每个人情况不同。
声明下:以下步骤运行起来可能会比较麻烦,但是绝对不会错。环境为python2,如果你要用3的话,要么改一下代码一些库函数。
先下载数据集COCO2017,这里应为我用的darknet框架,所以只用了train(118287张)和val(5000张)。你可以将测试集和验证集放在一起当成测试集(这里推荐大家使用卡帕斯的split,重新切分了train,val,test)这样测试起来,效果应该更好一点。
将coco的instances_val(train)2017.json标签转为voc(.xml)的标签。
先贴上coco json数据标签转为voc的xml标签的代码。
from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw
savepath="/home/test/darknet/VOC2020/"
datasets_list=['val2017'] ##运行完之后再改为train2017再运行一次
img_dir=savepath+'images/' #####这个路径会把你处理的图片拷贝进来,这里我们只处理了val2017文件夹下的数据,所以处理好之后需要修改生成image文件夹的名称为val2017
anno_dir=savepath+'annotations/' # 当前目录下会生成annotations文件夹存放xml,结束后修改名称
classes_names =['person','bicycle', 'car','motorcycle','airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog','horse', 'sheep','cow','elephant','bear', 'zebra', 'giraffe','backpack','umbrella', 'handbag','tie', 'suitcase', 'frisbee', 'skis', 'snowboard','sports ball', 'kite', 'baseball bat', 'baseball glove','skateboard', 'surfboard', 'tennis racket','bottle', 'wine glass', 'cup', 'fork','knife', 'spoon', 'bowl', 'banana','apple', 'sandwich', 'orange','broccoli', 'carrot', 'hot dog', 'pizza','donut', 'cake', 'chair', 'couch', 'potted plant', 'bed','dining table', 'toilet','tv','laptop', 'mouse','remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
dataDir= '/home/test/darknet/coco2017' ####### 连接到coco的数据集
headstr = """\
VOC
%s
NULL
company
%d
%d
%d
0
"""
objstr = """\
"""
tailstr = '''\
'''
def mkr(path):
if os.path.exists(path):
shutil.rmtree(path)
os.mkdir(path)
else:
os.mkdir(path)
mkr(img_dir)
mkr(anno_dir)
def id2name(coco):
classes=dict()
for cls in coco.dataset['categories']:
classes[cls['id']]=cls['name']
return classes
def write_xml(anno_path,head, objs, tail):
f = open(anno_path, "w")
f.write(head)
for obj in objs:
f.write(objstr%(obj[0],obj[1],obj[2],obj[3],obj[4]))
f.write(tail)
def save_annotations_and_imgs(coco,dataset,filename,objs):
anno_path=anno_dir+filename[:-3]+'xml'
print('anno_path:%s'%anno_path)
#img_path=dataDir+'/'+'images'+'/'+dataset+'/'+filename
img_path=dataDir+'/'+dataset+'/'+filename
print('img_path:%s'%img_path)
print('step3-image-path-OK')
dst_imgpath=img_dir+filename
img=cv2.imread(img_path)
'''if (img.shape[2] == 1):
print(filename + " not a RGB image")
return'''
print('img_path:%s'%img_path)
print('dst_imgpath:%s'%dst_imgpath)
shutil.copy(img_path, dst_imgpath)
head=headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
tail = tailstr
write_xml(anno_path,head, objs, tail)
def showimg(coco,dataset,img,classes,cls_id,show=True):
global dataDir
#I=Image.open('%s/%s/%s/%s'%(dataDir,'images',dataset,img['file_name']))
I=Image.open('%s/%s/%s'%(dataDir,dataset,img['file_name'])) ########may be you can changed
annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None)
anns = coco.loadAnns(annIds)
objs = []
for ann in anns:
class_name=classes[ann['category_id']]
if class_name in classes_names:
print(class_name)
if 'bbox' in ann:
bbox=ann['bbox']
xmin = int(bbox[0])
ymin = int(bbox[1])
xmax = int(bbox[2] + bbox[0])
ymax = int(bbox[3] + bbox[1])
obj = [class_name, xmin, ymin, xmax, ymax]
objs.append(obj)
#draw = ImageDraw.Draw(I)
#draw.rectangle([xmin, ymin, xmax, ymax])
# if show:
# plt.figure()
# plt.axis('off')
# plt.imshow(I)
# plt.show()
return objs
for dataset in datasets_list:
annFile='{}/annotations_1/instances_{}.json'.format(dataDir,dataset) # 你放json文件的路径
print('annFile:%s'%annFile)
coco = COCO(annFile)
'''
loading annotations into memory...
Done (t=0.81s)
creating index...
index created!
'''
classes = id2name(coco)
print("classes:%s"%classes)
classes_ids = coco.getCatIds(catNms=classes_names)
print(classes_ids)
for cls in classes_names:
cls_id=coco.getCatIds(catNms=[cls])
img_ids=coco.getImgIds(catIds=cls_id)
print(cls,len(img_ids))
# imgIds=img_ids[0:10]
for imgId in tqdm(img_ids):
img = coco.loadImgs(imgId)[0]
filename = img['file_name']
#print(filename)
objs=showimg(coco, dataset, img, classes,classes_ids,show=False)
#print(objs)
save_annotations_and_imgs(coco, dataset, filename, objs)
网上有很多demo,然而我一个个debug了一下午,最后才发现这个是最好用的,还有的不知啥玩意写的博客直接copy别人的代码,全是bug,路径贼鸡儿乱而且重叠也不修改一下就算了,关键是用都没用过就说可用,我上去就想给他一个大嘴巴子。好了,我也不是为了吐槽,就是希望以后大家写代码,要么就不要贴,要么用过贴出来之后能加点注释,这样乱贴误人子弟耽误人时间真的不好,会搞得人家压力很大。
分别得到了train的xml和val的xml之后,我们进一步使用脚本转化xml为txt,其中分为两部分
将所有xml的路径取出来放到train_all.txt中
这里我之前打算将train2017里面的图像路径全部读到train_all.txt中的,但是有个问题,从图像读出来的有118287个路径,但是从xml只有117266.少了两千张,不知道为什么。val2017里面也是本来是5000,但是生成的只有4852张。
这里默认你已经将train2017和val2017已经放到JPEGImages的文件夹下面
import os
from os import getcwd
wd =getcwd()
mulu=['/'+'annotations_train_xml','/'+'annotations_val_xml']
count=0
for i in mulu:
count+=1
dir =wd+i
print(dir)
filenames=os.listdir(dir)
if count==1:
f=open('train_all.txt','w')
count_1=0
for filename in filenames:
count_1+=1
out_path=dir+'/'+filename.replace('xml','jpg')
out_path=out_path.replace('annotations_train_xml','JPEGImages/train2017')
f.write(out_path+'\n')
f.close()
print('done!,total:%s'%count_1)
elif count==2:
f=open('val_all.txt','w')
count_1=0
for filename in filenames:
count_1+=1
out_path=dir+'/'+filename.replace('xml','jpg')
out_path=out_path.replace('annotations_val_xml','JPEGImages/val2017')
f.write(out_path+'\n')
f.close()
print('done!,total:%s'%count_1)
然后将得到的文件一起整理好,生成每个xml对应的txt标签
这里也是先执行train 然后修改train为val继续执行,就是打###的三个部分的路径
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
#20190227@new-only 2007 data
#sets=[('2007', 'train'), ('2007', 'val'), ('2007_test', 'test')]
sets =['train']
#classes = ['1', '2', '3','4','5','6','7','8','9','10','11', '12', '13','14','15','16','17','18','19','20']
classes = ['person','bicycle', 'car','motorcycle','airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog','horse', 'sheep','cow','elephant','bear', 'zebra', 'giraffe','backpack','umbrella', 'handbag','tie', 'suitcase', 'frisbee', 'skis', 'snowboard','sports ball', 'kite', 'baseball bat', 'baseball glove','skateboard', 'surfboard', 'tennis racket','bottle', 'wine glass', 'cup', 'fork','knife', 'spoon', 'bowl', 'banana','apple', 'sandwich', 'orange','broccoli', 'carrot', 'hot dog', 'pizza','donut', 'cake', 'chair', 'couch', 'potted plant', 'bed','dining table', 'toilet','tv','laptop', 'mouse','remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(image_id):
#print("2-open annotations")
#print('image_id:%s'%image_id)
#image_id_1 = image_id.split('/')[-1]
#print('image_id:%s'%image_id)
#imade_id = image_id_1.replace("jpg","xml")
#print('image_id:%s'%image_id)
#in_file = open('/home/test/darknet/VOC2020/annotations_val_xml/%s.xml'%(image_id))
#print('infile:','/home/test/darknet/VOC2020/annotations_val_xml/%s'%(image_id))
in_file = open('/home/test/darknet/VOC2020/annotations_train_xml/%s.xml'%(image_id)) ##########
#print("3-convert to txt")
out_file = open('/home/test/darknet/VOC2020/annotations_train_txt/%s.txt'%(image_id), 'w') #######
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
#print("write ok")
#wd = getcwd()
wd = " "
for image_set in sets:
image_ids = open('train_all.txt').read().strip().split() ######
# image_ids = open('%s.txt'%(image_set)).read().strip().split()
print("start ")
#list_file = open('%s.txt'%(image_set), 'w')
for image_id in image_ids:
#print("again write")
#print('image_id:%s'%image_id)
#list_file.write('%s/%s.jpg\n'%(wd, image_id))
id = image_id.split('/')[-1].replace('jpg','xml')
id =id.split('.')[0]
print('id:%s'%id)
convert_annotation(id)
#list_file.close()
本来这个脚本是可以实现直接生成第一步的train_all.txt的,但是我觉得debug每次生成一个文件太麻烦就改了一下。
最后将你生成的txt标签放到VOC2020的labels的train2017和val2017下面就可以开始训练darknet了。
注意修改voc.names为coco.names。还有yolo层前一个卷积核的数量和class数量,coco也是80类别所以卷积核对应为255
https://blog.csdn.net/weixin_42731241/article/details/81352013 这个博客也可以结合看