参考: 【LogoDetection 数据集处理】(5)数据集格式转换:labelme格式转为coco格式
coco格式数据集:
"""
将coco格式的数据集转为labelme格式:
1、按照类别划分文件夹
2、图像文件重命名
3、在图中画出GT框验证转换后的labelme格式是否正确
"""""
import json
import os
import shutil
import cv2
import numpy as np
from tqdm import tqdm
# 将图片按照类别分为50个文件夹
def split_classes(trainset_dir):
# 创建最外层文件夹
isExists_trainset_dir = os.path.exists(trainset_dir)
if not isExists_trainset_dir:
os.mkdir(trainset_dir)
# 创建50个子文件夹
class_dict = {}
for i in range(len(categories_list)):
dir_name = categories_list[i]["name"]
dir_name_path = os.path.join(trainset_dir, dir_name)
class_dict[i + 1] = dir_name_path
isExists = os.path.exists(dir_name_path)
if isExists:
shutil.rmtree(dir_name_path)
os.makedirs(dir_name_path, exist_ok=True)
# 移动图片到对应类别的文件夹
images_name = os.listdir(coco_images_path)
for image_name in tqdm(images_name, desc="split 50 classes process"):
image_path = os.path.join(coco_images_path, image_name)
for i in range(len(images_list)):
if image_name == images_list[i]["file_name"]:
image_id = images_list[i]["id"]
for j in range(len(annotations_list)):
if image_id == annotations_list[j]["image_id"]:
cls_id = annotations_list[j]["category_id"]
pic_file_path = class_dict[cls_id]
shutil.copy(image_path, pic_file_path)
def coco_2_labelme(trainset_dir, visual=False):
trainset_dir_list = os.listdir(trainset_dir)
for subdir_name in tqdm(trainset_dir_list, desc="coco_2_labelme process"):
subdir_path = os.path.join(trainset_dir, subdir_name)
pic_num = 0
pic_names = os.listdir(subdir_path)
for pic_name in pic_names:
pic_path = os.path.join(subdir_path, pic_name)
ext = pic_name.split('.')[-1]
labelme_pic_name = subdir_name + "_" + str(pic_num) + "." + ext
pic_num += 1
labelme_pic_path = os.path.join(subdir_path, labelme_pic_name)
labelme_dict = {}
labelme_dict["version"] = "5.0.0"
labelme_dict["flags"] = {}
labelme_dict["imagePath"] = labelme_pic_name
labelme_dict["imageData"] = ""
labelme_dict["shapes"] = []
for images_list_dict in images_list:
if images_list_dict["file_name"] == pic_name:
labelme_dict["imageHeight"] = images_list_dict["height"]
labelme_dict["imageWidth"] = images_list_dict["width"]
image_id = images_list_dict["id"]
for annotations_list_dict in annotations_list:
if annotations_list_dict["image_id"] == image_id:
shapes_dict = {}
shapes_dict["label"] = subdir_name
shapes_dict["group_id"] = 'null'
shapes_dict["shape_type"] = 'rectangle'
shapes_dict["flags"] = {}
shapes_dict["points"] = [[], []]
# coco的bbox用的是x0,y0,w,h labelme的bbox用的是x0,y0,x1,y1
x0 = annotations_list_dict["bbox"][0]
y0 = annotations_list_dict["bbox"][1]
w = annotations_list_dict["bbox"][2]
h = annotations_list_dict["bbox"][3]
shapes_dict["points"][0].append(x0)
shapes_dict["points"][0].append(y0)
shapes_dict["points"][1].append(x0 + w)
shapes_dict["points"][1].append(y0 + h)
labelme_dict["shapes"].append(shapes_dict)
if visual:
cls_id = annotations_list_dict["category_id"]
pic_visual(pic_path, shapes_dict["points"], cls_id, categories_list)
# 写入labelme格式的json文件
labelme_json_name = labelme_pic_name.split('.')[0] + ".json"
labelme_json_path = os.path.join(subdir_path, labelme_json_name)
with open(labelme_json_path, 'w', encoding='utf-8') as f:
json.dump(labelme_dict, f, indent=4, ensure_ascii=False)
# 重命名图片
os.rename(pic_path, labelme_pic_path)
def pic_visual(img_path, bbox, cls_id, class_names):
from categories_list import COLORS
img = cv2.imread(img_path)
color = (COLORS[cls_id] * 255).astype(np.uint8).tolist()
text = '{}'.format(class_names[cls_id - 1])
txt_color = (0, 0, 0) if np.mean(COLORS[cls_id]) > 0.5 else (255, 255, 255)
font = cv2.FONT_HERSHEY_SIMPLEX
txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
txt_bk_color = (COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist()
x_min = bbox[0][0]
y_min = bbox[0][1]
x_max = bbox[1][0]
y_max = bbox[1][1]
cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, thickness=2)
cv2.rectangle(img, (x_min, y_min - int(1.5 * txt_size[1])), (x_min + txt_size[0] + 1, y_min), txt_bk_color, -1)
cv2.putText(img, text, (x_min, y_min - 3), font, 0.4, txt_color, thickness=1)
cv2.imwrite(img_path, img)
if __name__ == '__main__':
# 类别目录
from categories_list import categories_list_round1_train, categories_list_round2_train
categories_list = categories_list_round1_train
# coco格式数据文件
coco_root_path = "dataset/fewshotlogodetection_round1_train_202204/train"
coco_images_path = os.path.join(coco_root_path, "images")
coco_annotations_path = os.path.join(coco_root_path, "annotations/instances_train2017.json")
with open(coco_annotations_path, 'r', encoding='utf-8') as f:
annoations_dict = json.load(f)
images_list = annoations_dict["images"]
annotations_list = annoations_dict["annotations"]
# 按照类别划分50个文件夹
dir = "TrainSet_50Classes_round1"
split_classes(dir)
if_visual = False
coco_2_labelme(dir, visual=if_visual)