我写的这个版本不同于之前的写法,我使用的是线下手段对照片进行合成和json的修改,训练测试效果非常的好。能够大幅度提升分数。写起来也是比较繁琐的。
这种是提取照片融合按照0.5的数值混合,不同于mixup,这种加入没有背景,效果会好,这种混合的照片人眼看起来并不是很舒服,但是能对训练带来很好涨分。
第一,你需要提取训练时候最低的几个类别,这里提取是提取单独的物体,不包含背景,也就是说你需要根据json文件,或者xml文件来知道bbox的位置进行提取,这种提取就跟上图所示一样,小的部分作为前景,后面我们需要在找到一部分照片作为背景。我选择的还是比较比较低的ap类别作为背景,这个时候也需要从总共的xml信息中提取出来,因为你需要将新添加的前景的bbox位置加入其中,这里我选择修改json文件,xml比较难操作。
在提取前景之前你需要先提取出你想要的低ap类别的全部照片,这个代码可以提取jpg文件,也可以提取xml文件,只需要简单的修改一下就能提取我们想要的低ap类别
import os
import numpy as np
from pycocotools.coco import COCO
import random
import cv2
coco = COCO('./coco/all.json')
ids1 = coco.getAnnIds()
ids2 = coco.getImgIds()
items=[]
for i in range(len(ids1)):
data = coco.loadAnns(ids1[i])
if data[0]['category_id']==2:
items.append(data[0]['image_id'])
elif data[0]['category_id']==6:
items.append(data[0]['image_id'])
elif data[0]['category_id']==20:
items.append(data[0]['image_id'])
elif data[0]['category_id'] == 28:
items.append(data[0]['image_id'])
elif data[0]['category_id'] == 36:
items.append(data[0]['image_id'])
elif data[0]['category_id'] == 37:
items.append(data[0]['image_id'])
else:
continue
item =np.unique(items)
print('++++++++++++++++++')
item=list(item)
name=[]
for j in range(len(ids2)):
imgdata=coco.loadImgs(ids2[j])
if imgdata[0]['id'] in item:
name.append(imgdata[0]['file_name'])
print(name)
import os
import shutil
xml_train = './coco/xml'
i = 0
while(i<len(name)):
random_file = name[i].split('.')[0]+'.xml'
source_file = "%s/%s" % (xml_train, random_file)
xml_val = './coco/yuanxml/'
print(i)
if random_file not in os.listdir(xml_val):
shutil.move(source_file, xml_val)
i=i+1
接下来就是把低ap的类别单独截取下来,这个过程需要利用xml文件获取位置,然后截取保存,这里我用label.txt文件用来存储类别信息的,后面加入到背景之中,需要对背景的json文件修改,添加信息。
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import cv2
import os
file=os.listdir('./data/aaa/')
xml=os.listdir('./data/nnnxml/')
for i in range(len(file)):
print(i)
a=imread('./data/aaa/'+file[i])
b = cv2.resize(a,(150,150))
b = cv2.cvtColor(b, cv2.COLOR_BGR2RGB)
path='./data/resize/'+file[i]
cv2.imwrite(path,b)
for i in range(len(xml)):
tree = ET.parse('./data/nnnxml/'+xml[i])
root = tree.getroot()
xml_list = []
for member in root.findall('object'):
name = member[0].text
if name in cate:
value = (name,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
print(value)
filen =open('./data/label.txt','a+')
filen.write(str(i)+' ')
filen.write(value[0])
filen.write('\n')
imgfile='./data/nnn/'+file[i]
print(imgfile)
imgdata=imread(imgfile)
newimg = imgdata[value[1]:value[3], value[2]:value[4], :]
w,h,c=newimg.shape
if w!=0 and h!=0:
newimg = cv2.cvtColor(newimg, cv2.COLOR_BGR2RGB)
pa = './data/aaa/' + str(i) + '.jpg'
cv2.imwrite(pa, newimg)
else:
continue
这一个步骤就是利用之前的前景和背景融合,因为照片大小关系,所以我设置了二种大小这个是目前最初的基础版本,效果已经十分出色。我中间又创建了一个文件,保存label,为什么需要二个?因为第一个是保存前景的类别,第二个是保存背景加入前景对应后的类别,因为os.listdir中间会乱序,所以这样是保证照片label的对应。
filea=os.listdir('./data/aaa/')
fileb = os.listdir('./data/ji/')
for i in range(len(fileb)):
# a=cv2.resize(a,(250,250))
b=imread('./data/ji/'+fileb[i])
dd=filea[i].split('.')[0]
cls=''
filen = open('./data/label.txt', 'r')
for line in filen.readlines():
c = line.split(' ')[0]
if dd == c:
cls = line.split(' ')[1]
print(cls)
yiyi=fileb[i].split('.')[0]
filen = open('./data/nnn.txt', 'a+')
filen.write(yiyi+' ')
filen.write(cls)
filen.write('\n')
w,h,c=b.shape
c=b*0.5
if w>350 and h>350:
c[100:350,100:350,:]=c[100:350,100:350,:]+a*0.5
else:
a = cv2.resize(a, (50, 50))
c[:50, :50, :] = c[:50, :50, :] + a * 0.5
c=c[:,:,::-1]
pa='./data/new2/'+fileb[i]
cv2.imwrite(pa,c)
前一步只是照片融合,json文件并没有改变,所以我们需要在每个照片上都添加新增的照片位置和类别就行了。
import os
import cv2
import json
import xml.dom.minidom
import xml.etree.ElementTree as ET
data_dir = './data/' #根目录文件,其中包含image文件夹和box文件夹(根据自己的情况修改这个路径)
image_file_dir = os.path.join(data_dir, 'new')
xml_file_dir = os.path.join(data_dir, 'jixml')
annotations_info = {'images': [], 'annotations': [], 'categories': []}
categories_map = {'一次性快餐盒':1,'书籍纸张':2, '充电宝':3,'剩饭剩菜':4, '包':5,'垃圾桶':6,
'塑料器皿':7,'塑料玩具':8, '塑料衣架':9, '大骨头':10,'干电池':11,
'快递纸袋':12,'插头电线':13, '旧衣服':14,'易拉罐':15}
for key in categories_map:
categoriy_info = {"id":categories_map[key], "name":key}
annotations_info['categories'].append(categoriy_info)
file_names = [image_file_name.split('.')[0]
for image_file_name in os.listdir(image_file_dir)]
ann_id = 1
for i, file_name in enumerate(file_names):
print(i)
image_file_name = file_name + '.jpg'
print(file_name)
xml_file_name = file_name + '.xml'
image_file_path = os.path.join(image_file_dir, image_file_name)
xml_file_path = os.path.join(xml_file_dir, xml_file_name)
image_info = dict()
image = cv2.cvtColor(cv2.imread(image_file_path), cv2.COLOR_BGR2RGB)
height, width, _ = image.shape
image_info = {'file_name': image_file_name, 'id': i+1,
'height': height, 'width': width}
annotations_info['images'].append(image_info)
DOMTree = xml.dom.minidom.parse(xml_file_path)
collection = DOMTree.documentElement
names = collection.getElementsByTagName('name')
names = [name.firstChild.data for name in names]
xmins = collection.getElementsByTagName('xmin')
xmins = [xmin.firstChild.data for xmin in xmins]
ymins = collection.getElementsByTagName('ymin')
ymins = [ymin.firstChild.data for ymin in ymins]
xmaxs = collection.getElementsByTagName('xmax')
xmaxs = [xmax.firstChild.data for xmax in xmaxs]
ymaxs = collection.getElementsByTagName('ymax')
ymaxs = [ymax.firstChild.data for ymax in ymaxs]
object_num = len(names)
for j in range(object_num):
if names[j] in categories_map:
image_id = i + 1
x1,y1,x2,y2 = int(xmins[j]),int(ymins[j]),int(xmaxs[j]),int(ymaxs[j])
x1,y1,x2,y2 = x1 ,y1 ,x2 ,y2
if x2 == width:
x2 -= 1
if y2 == height:
y2 -= 1
x,y = x1,y1
w,h = x2 - x1 + 1,y2 - y1 + 1
category_id = categories_map[names[j]]
area = w * h
annotation_info = {"id": ann_id, "image_id":image_id, "bbox":[x, y, w, h], "category_id": category_id, "area": area,"iscrowd": 0}
annotations_info['annotations'].append(annotation_info)
ann_id += 1
filen = open('./data/nnn.txt', 'r')
d='书籍纸张'
for line in filen.readlines():
c = line.split(' ')[0]
if file_name == c:
nn=line.split(' ')[1]
d=nn.split('\n')[0]
print(d)
category_id = categories_map[d]
if width>350 and height>350:
x1, y1, x2, y2=100,100,350,350,
if x2 == width:
x2 -= 1
if y2 == height:
y2 -= 1
x, y = x1, y1
w, h = x2 - x1 + 1, y2 - y1 + 1
area = w * h
annotation_info = {"id": ann_id, "image_id": image_id, "bbox": [x, y, w, h], "category_id": category_id,
"area": area, "iscrowd": 0}
annotations_info['annotations'].append(annotation_info)
else:
x1, y1, x2, y2 = 0, 0, 50, 50,
if x2 == width:
x2 -= 1
if y2 == height:
y2 -= 1
x, y = x1, y1
w, h = x2 - x1 + 1, y2 - y1 + 1
area = w * h
annotation_info = {"id": ann_id, "image_id": image_id, "bbox": [x, y, w, h], "category_id": category_id,
"area": area, "iscrowd": 0}
annotations_info['annotations'].append(annotation_info)
ann_id += 1
with open('./data/new.json', 'w') as f:
json.dump(annotations_info, f, indent=4)
print('---整理后的标注文件---')
print('所有图片的数量:', len(annotations_info['images']))
print('所有标注的数量:', len(annotations_info['annotations']))
print('所有类别的数量:', len(annotations_info['categories']))