在学会YOLO目标检测后第一次参加这样的比赛,特此做个记录,此处也提出一些改进的方案。比赛链接:ICME-2022 安全AI挑战者计划第九期:小样本商标检测挑战赛-天池大赛-阿里云天池 (aliyun.com)。比赛规则比较严格,首先只能使用Image 1K的预训练模型,但YOLO自身只提供基于COCO的预训练模型;且不允许使用模型融合,即使用多个模型的预测结果做并集,冲排名的话需要注意。本文需要掌握一定的yolo检测基础,具体的训练命令和检测命令省略。
import os.path as osp
import numpy as np
from PIL import Image
import seaborn as sns
from matplotlib import pyplot as plt
import glob
root = "数据集根目录"
img_dir = osp.join(root, 'images')
img_paths = glob.glob(img_dir + '/*')
def get_img_size(img_path):
image = Image.open(img_path)
w, h = image.size
return w, h
w_list, h_list, rat_list = [], [], []
for img in img_paths:
w, h = get_img_size(img)
f, ax = plt.subplots(1,3, figsize=(16,4))
sns.histplot(w_list, ax=ax[0], palette=sns.light_palette("seagreen", as_cmap=True)).set_title('Width')
sns.histplot(h_list, ax=ax[1], palette=sns.color_palette("RdPu", 10)).set_title('Height')
sns.histplot(np.array(w_list)/np.array(h_list), ax=ax[2], palette=sns.color_palette("RdPu", 10)).set_title('W&H Ratio')
bbox 小目标(area<%3)占比 统计
| class | small object ratio |
| 冰墩墩 | 0.35398230088495575 |
| Sanyo/三洋 | 1.0 |
| Eifini/伊芙丽 | 1.0 |
| PSALTER/诗篇 | 0.9636363636363636 |
| Beaster | 0.84375 |
| ON/昂跑 | 0.9878048780487805 |
| BYREDO/柏芮朵 | 0.9568965517241379 |
| Ubras | 0.98 |
| Eternelle | 0.6326530612244898 |
| PERFECT DIARY/完美日记 | 0.8918918918918919 |
| 花西子 | 0.9897959183673469 |
| Clarins/娇韵诗 | 0.9734513274336283 |
| L'occitane/欧舒丹 | 0.9716312056737588 |
| Versace/范思哲 | 0.8235294117647058 |
| Mizuno/美津浓 | 0.7520661157024794 |
| Lining/李宁 | 0.95 |
| DOUBLE STAR/双星 | 0.5416666666666666 |
| YONEX/尤尼克斯 | 0.8475609756097561 |
| Tory Burch/汤丽柏琦 | 0.9105691056910569 |
| Gucci/古驰 | 0.9432624113475178 |
| Louis Vuitton/路易威登 | 0.9702970297029703 |
| CARTELO/卡帝乐鳄鱼 | 0.7894736842105263 |
| JORDAN | 0.828125 |
| KENZO | 0.8148148148148148 |
| UNDEFEATED | 0.8936170212765957 |
| BOY LONDON | 0.6715328467153284 |
| TREYO/雀友 | 0.9081632653061225 |
| carhartt | 0.9514563106796117 |
| 洁柔 | 0.9771241830065359 |
| Blancpain/宝珀 | 1.0 |
| GXG | 1.0 |
| 乐町 | 1.0 |
| Diadora/迪亚多纳 | 0.38271604938271603 |
| TUCANO/啄木鸟 | 0.6031746031746031 |
| Loewe | 0.9120879120879121 |
| Granite Gear | 0.9813084112149533 |
| DESCENTE/迪桑特 | 1.0 |
| OSPREY | 0.8968253968253969 |
| Swatch/斯沃琪 | 0.9466666666666667 |
| erke/鸿星尔克 | 0.8674698795180723 |
| Massimo Dutti | 0.9807692307692307 |
| PINKO | 0.8390804597701149 |
| PALLADIUM | 0.9441624365482234 |
| origins/悦木之源 | 0.9767441860465116 |
| Trendiano | 1.0 |
| 音儿 | 1.0 |
| Monster Guardians | 0.9891304347826086 |
| 敷尔佳 | 0.8620689655172413 |
| IPSA/茵芙莎 | 0.9777777777777777 |
| Schwarzkopf/施华蔻 | 0.954954954954955 |
| all | 0.8899438093392753 |
# refer: https://github.com/CarryHJR/LogDet/blob/master/LogDetMini/eda/eda.ipynb
root = "json文件所在的根目录"
json_path = osp.join(root, 'instances_train2017.json')
coco = COCO(json_path) # 此处可能会报错,需要再coco的jsonload语句下添加encoding='utf-8'
print('bbox 小目标(area<%3)占比 统计')
class_names = []
wh_ratios_cls = []
for cat_id in coco.cats:
wh_ratios = []
for ann_id in coco.getAnnIds(catIds=[cat_id]):
ann = coco.anns[ann_id]
image_id = ann['image_id']
w_ratio = ann['bbox'][2] / coco.imgs[image_id]['width']
h_ratio = ann['bbox'][3] / coco.imgs[image_id]['height']
wh_ratios.append([w_ratio, h_ratio])
wh_ratios = np.array(wh_ratios)
wh_ratios[:, -1] = wh_ratios[:, 0] * wh_ratios[:, 1]
wh_ratios_cls.append((wh_ratios[:,-1]<0.03).sum() / wh_ratios.shape[0]) # 此处可以调整阈值
wh_ratios = []
for _, ann in coco.anns.items():
image_id = ann['image_id']
w_ratio = ann['bbox'][2] / coco.imgs[image_id]['width']
h_ratio = ann['bbox'][3] / coco.imgs[image_id]['height']
wh_ratios.append([w_ratio, h_ratio])
wh_ratios = np.array(wh_ratios)
wh_ratios[:, -1] = wh_ratios[:, 0] * wh_ratios[:, 1]
wh_ratios_cls.append((wh_ratios[:,-1]<0.03).sum() / wh_ratios.shape[0])
# 生成面积占比表格
from prettytable import PrettyTable
table_data = PrettyTable()
table_data.add_column('class', class_names)
table_data.add_column('small object ratio', wh_ratios_cls)
# 生成宽高比例图
bbox_wh = [round(max(ann['bbox'][2], ann['bbox'][3]) / min(ann['bbox'][2], ann['bbox'][3]), 0) for _, ann in coco.anns.items()]
bbox_wh_unique = list(set(bbox_wh))
bbox_wh_count=[bbox_wh.count(i) for i in bbox_wh_unique]
k = 10
wh_df = pd.DataFrame(bbox_wh_count[:k], index=bbox_wh_unique[:k])
YOLOv5的源码对数据集的要求时txt格式,源码的团队也有写转换的方法ultralytics/JSON2YOLO: Convert JSON annotations into YOLO format. (github.com),此处我实现的代码如下:
#COCO 格式的数据集转化为 YOLO 格式的数据集
json_path = 'json文件的地址'
save_path = 'txt文件保存地址'
def convert(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = box[0] + box[2] / 2.0
y = box[1] + box[3] / 2.0
w = box[2]
h = box[3]
# round函数确定(xmin, ymin, xmax, ymax)的小数位数
x = round(x * dw, 6)
w = round(w * dw, 6)
y = round(y * dh, 6)
h = round(h * dh, 6)
return (x, y, w, h)
if __name__ == '__main__':
json_file = json_path # COCO Object Instance 类型的标注
ana_txt_save_path = save_path # 保存的路径
data = json.load(open(json_file, 'r', encoding='utf-8'))
if not os.path.exists(ana_txt_save_path):
# 因为json文件的类别id从1开始,所以此处做了一下重映射,并将类别存入了classes.txt文件下,当然也可以不保存文件;同样的也可以不做重映射,那样类别数量为51,因为id0为空
id_map = {}
with open(os.path.join(ana_txt_save_path, 'classes.txt'), 'w') as f:
for i, category in enumerate(data['categories']):
id_map[category['id']] = i
for img in tqdm(data['images']):
filename = img["file_name"]
img_width = img["width"]
img_height = img["height"]
img_id = img["id"]
head, tail = os.path.splitext(filename)
ana_txt_name = head + ".txt" # 对应的txt名字,与jpg一致
f_txt = open(os.path.join(ana_txt_save_path, ana_txt_name), 'w')
for ann in data['annotations']:
if ann['image_id'] == img_id:
box = convert((img_width, img_height), ann["bbox"])
f_txt.write("%s %s %s %s %s\n" % (id_map[ann["category_id"]], box[0], box[1], box[2], box[3]))
# 需要对训练集进行增强,将train和val
root_path = "数据集根目录"
image_p = "images"
label_p = "labels"
image_path = os.path.join(root_path, image_p)
label_path = os.path.join(root_path, label_p)
# 获得对应类别的图片以及便签名
def find_same_class(label_path, id):
label = os.listdir(label_path)
result = []
for l in label:
with open(os.path.join(label_path, l), "r") as f:
temp = f.read().split(' ')
if temp[0] == id:
return result
# 将文件名重新按(类别_编号)的形式命名
def change_file_name():
class_list = []
for i in tqdm(range(50)):
class_list.append(find_same_class(os.path.join(root_path, label_p), i.__str__()))
k = 0
for j in tqdm(class_list):
n = 0
for temp in j:
os.rename(os.path.join(image_path, temp + ".jpg"), os.path.join(image_path, "{}_{}.jpg".format(k, n)))
os.rename(os.path.join(label_path, temp + ".txt"), os.path.join(label_path, "{}_{}.txt".format(k, n)))
n += 1
k += 1
# 图片旋转,此处只做了90、180和270的旋转
def image_rotation(path=image_path, angle=90):
image_list = os.listdir(path)
for img in tqdm(image_list):
if img.split('_').__len__() != 2:
img_p = os.path.join(path, img)
res_name = img.split('.')[0] + "_rot{}.jpg".format(angle)
temp = cv2.imread(img_p)
if angle == 90:
result = cv2.rotate(temp, cv2.ROTATE_90_CLOCKWISE)
elif angle == 180:
result = cv2.rotate(temp, cv2.ROTATE_180)
elif angle == 270:
result = cv2.rotate(temp, cv2.ROTATE_90_COUNTERCLOCKWISE)
cv2.imwrite(os.path.join(path, res_name), result)
def rotation(x, y, angle):
# x, y坐标变成图片中心点,标准坐标轴
x = x - 0.5
y = -1 * (y - 0.5)
pt = np.array([x, y])
ang = np.pi * angle/180
# 设定旋转矩阵
M = np.zeros((2, 2), dtype=float)
# 设定旋转角度
alpha = np.cos(ang)
beta = np.sin(ang)
# 初始化旋转矩阵
M[0, 0] = alpha
M[1, 1] = alpha
M[0, 1] = beta
M[1, 0] = -beta
[nx, ny] = M @ pt
# 还原x, y到图片坐标系
nx = nx + 0.5
ny = -1 * ny + 0.5
return str(nx), str(ny)
# 旋转90度和270度时需要交换w和h
def switch_p(w, h):
return h, w
# 标签的坐标旋转,x、y、w、h
def label_rotation(path=label_path, angle=90):
label_list = os.listdir(path)
for lab in tqdm(label_list):
if lab.split('_').__len__() != 2:
save_name = lab.split('.')[0]
with open(os.path.join(path, lab), 'r') as f:
write_label = []
label = f.readlines()
for i in label:
temp = i.split(' ')
temp[4] = temp[4].split('\n')[0]
temp[1] , temp[2] = rotation(float(temp[1]), float(temp[2]), angle)
if angle != 180:
temp[3] , temp[4] = switch_p(temp[3], temp[4])
s = str.join(' ', temp)
with open(os.path.join(path, "{}_rot{}.txt".format(save_name, angle)), 'w') as nf:
for line in write_label:
print("{}_rot{}.txt,done".format(save_name, angle))
root_path = '数据集根目录'
image_path = os.path.join(root_path, "images")
# 单个类下的图片总数为num,weights为train和val的分割,此处偷懒将所有类别的图片均设置为50张,实际可以根据上面的find_same_class方法读取每一类的图片数量
def random_id(num=50, weights=(0.8, 0.2)):
len = num * weights[0]
num_list = []
val_list = []
while num_list.__len__() < len:
random_num = int(num * random.random())
if num_list.count(random_num) == 0:
for i in range(num):
if num_list.count(i) == 0:
return num_list, val_list
# 根据重命名的图片文件,使得样本均衡
def random_split(path=image_path, weights=(0.8, 0.2)):
train_list = []
val_list = []
for i in tqdm(range(50)):
t, v = random_id(50, weights)
for k in t:
train_list.append("{}_{}.jpg".format(i, k))
train_list.append("{}_{}_rot90.jpg".format(i, k))
train_list.append("{}_{}_rot180.jpg".format(i, k))
train_list.append("{}_{}_rot270.jpg".format(i, k))
for m in v:
val_list.append("{}_{}.jpg".format(i, m))
val_list.append("{}_{}_rot90.jpg".format(i, m))
val_list.append("{}_{}_rot180.jpg".format(i, m))
val_list.append("{}_{}_rot270.jpg".format(i, m))
txt = ['train.txt', 'val.txt']
with open(os.path.join(root_path, txt[0]), 'w') as f:
for n in range(train_list.__len__()):
f.write('./images/{}\n'.format(train_list[n])) # add image to txt file
with open(os.path.join(root_path, txt[1]), 'w') as f:
for n in range(val_list.__len__()):
f.write('./images/{}\n'.format(val_list[n])) # add image to txt file
train: E:\Document\tianchi\dataset\ali_train\train\train.txt # train images
val: E:\Document\tianchi\dataset\ali_train\train\val.txt # val images
nc: 50 # number of classes 如果没有做重映射,即id从1开始的话,nc=51,在names前加一个空类''
names: ['冰墩墩', 'Sanyo/三洋', 'Eifini/伊芙丽', 'PSALTER/诗篇', 'Beaster', 'ON/昂跑', 'BYREDO/柏芮朵',
'Ubras', 'Eternelle', 'PERFECT DIARY/完美日记', '花西子', 'Clarins/娇韵诗', "L'occitane/欧舒丹",
'Versace/范思哲', 'Mizuno/美津浓', 'Lining/李宁', 'DOUBLE STAR/双星', 'YONEX/尤尼克斯', 'Tory Burch/汤丽柏琦',
'Gucci/古驰', 'Louis Vuitton/路易威登', 'CARTELO/卡帝乐鳄鱼', 'JORDAN', 'KENZO', 'UNDEFEATED',
'BOY LONDON', 'TREYO/雀友', 'carhartt', '洁柔', 'Blancpain/宝珀', 'GXG', '乐町', 'Diadora/迪亚多纳',
'TUCANO/啄木鸟', 'Loewe', 'Granite Gear', 'DESCENTE/迪桑特', 'OSPREY', 'Swatch/斯沃琪', 'erke/鸿星尔克',
'Massimo Dutti', 'PINKO', 'PALLADIUM', 'origins/悦木之源', 'Trendiano', '音儿', 'Monster Guardians',
'敷尔佳', 'IPSA/茵芙莎', 'Schwarzkopf/施华蔻'] # class names
json_path = 'json文件路径'
img_path = '测试集图片根目录'
if __name__ == '__main__':
json_file = json_path
data = json.load(open(json_file, 'r', encoding='utf-8'))
id_map = {} # id和图片的映射
for category in enumerate(data['images']):
file_name = category[1]['file_name']
id = category[1]['id']
id_map[id] = file_name
# 对图片进行重命名
for i in id_map:
img_path = os.path.join(img_path, id_map[i])
output_path = os.path.join(img_path, "{}.jpg".format(i))
os.rename(img_path, output_path)
def save_one_json(predn, jdict, path, class_map):
# Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
image_id = int(path.stem) if path.stem.isnumeric() else path.stem
box = xyxy2xywh(predn[:, :4]) # xywh
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
for p, b in zip(predn.tolist(), box.tolist()):
jdict.append({'image_id': image_id,
'category_id': class_map[int(p[5])+1], # 如果做了重映射id编号为0-49,但实际结果需求是1-50,因此让id+1即可。
'bbox': [round(x, 3) for x in b],
'score': round(p[4], 5)})