标注软件常用的也就LabelImg和Labelme,分别用于目标检测与分割的标注。但是吧,咱不能被工具限制死,公开的是是分割,咱可以转换为检测需要的格式啊。本文从两个角度达到目的,一是通过labelme的json格式,二是直接通过二值mask
JPEGImages夹下就是原始图片,Annotions文件夹中是xml格式的标注文件,Main下就是数据集切分的训练验证测试的txt存储对应的文件名
先看下咱的目标labelImg的xml格式:
<annotation>
<folder>WH_datafolder>
<filename>1552.jpgfilename>
<source>
<database>WH Datadatabase>
<annotation>WHannotation>
<image>flickrimage>
<flickrid>NULLflickrid>
source>
<owner>
<flickrid>NULLflickrid>
<name>WHname>
owner>
<size>
<width>1920width>
<height>1080height>
<depth>3depth>
size>
<segmented>0segmented>
<object>
<name>tankname>
<pose>Unspecifiedpose>
<truncated>1truncated>
<difficult>0difficult>
<bndbox>
<xmin>1213xmin>
<ymin>921ymin>
<xmax>1392xmax>
<ymax>1004ymax>
bndbox>
object>
annotation>
下面我们看一下如何转换吧
数据源labelme的json格式:
{
"version": "4.5.13",
"flags": {},
"shapes": [
{
"label": "tank",
"points": [
[
1219.6581196581196,
921.3675213675215
],
[
1298.2905982905984,
931.1965811965813
],
[
1337.6068376068376,
939.3162393162394
],
[
1342.3076923076924,
947.0085470085471
],
[
1342.7350427350427,
962.8205128205129
],
[
1389.3162393162395,
971.3675213675215
],
[
1392.3076923076924,
974.7863247863248
],
[
1392.3076923076924,
977.7777777777778
],
[
1344.871794871795,
970.5128205128206
],
[
1342.7350427350427,
1003.8461538461539
],
[
1336.7521367521367,
1004.2735042735044
],
[
1226.923076923077,
985.8974358974359
],
[
1213.2478632478633,
978.6324786324786
]
],
"group_id": null,
"shape_type": "polygon",
"flags": {}
}
],
"imagePath": "1552.jpg",
"imageData": null,
"imageHeight": 1080,
"imageWidth": 1920
}
转换json格式为矩形的xml,然后拷贝原始图片至JPEGImages
labelme2Xml.py # ps:json文件和原始图片在同一个文件夹下
# -*- coding: utf-8 -*-
import os
from typing import List, Any
import numpy as np
import codecs
import json
from glob import glob
import cv2
import shutil
from sklearn.model_selection import train_test_split
# 1.标签路径
labelme_path = r"C:\\Users\\fei\Downloads\\json2xml\\tank_zhu\\"
#原始labelme标注数据路径
saved_path = "VOC2007/"
# 保存路径
isUseTest=True#是否创建test集
# 2.创建要求文件夹
if not os.path.exists(saved_path + "Annotations"):
os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):
os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/Main/"):
os.makedirs(saved_path + "ImageSets/Main/")
# 3.获取待处理文件
files = glob(labelme_path + "*.json")
files = [i.replace("\\","/").split("/")[-1].split(".json")[0] for i in files]
print(files)
# 4.读取标注信息并写入 xml
for json_file_ in files:
json_filename = labelme_path + json_file_ + ".json"
json_file = json.load(open(json_filename, "r", encoding="utf-8"))
height, width, channels = cv2.imread(labelme_path + json_file_ + ".jpg").shape
with codecs.open(saved_path + "Annotations/" + json_file_ + ".xml", "w", "utf-8") as xml:
xml.write('\n' )
xml.write('\t' + 'WH_data' + '\n')
xml.write('\t' + json_file_ + ".jpg" + '\n')
xml.write('\t\n' )
xml.write('\t\tWH Data \n')
xml.write('\t\tWH \n')
xml.write('\t\t
flickr\n')
xml.write('\t\tNULL \n')
xml.write('\t\n')
xml.write('\t\n' )
xml.write('\t\tNULL \n')
xml.write('\t\tWH \n')
xml.write('\t\n')
xml.write('\t\n' )
xml.write('\t\t' + str(width) + '\n')
xml.write('\t\t' + str(height) + '\n')
xml.write('\t\t' + str(channels) + '\n')
xml.write('\t\n')
xml.write('\t\t0 \n')
for multi in json_file["shapes"]:
points = np.array(multi["points"])
labelName=multi["label"]
xmin = min(points[:, 0])
xmax = max(points[:, 0])
ymin = min(points[:, 1])
ymax = max(points[:, 1])
label = multi["label"]
if xmax <= xmin:
pass
elif ymax <= ymin:
pass
else:
xml.write('\t\n')
print(json_filename, xmin, ymin, xmax, ymax, label)
xml.write('')
# 5.复制图片到 VOC2007/JPEGImages/下
image_files = glob(labelme_path + "*.jpg")
print("copy image files to VOC007/JPEGImages/")
for image in image_files:
shutil.copy(image, saved_path + "JPEGImages/")
splitData_labelImg.py
import os
import random
trainval_percent = 1.0 # 训练验证样本占所有样本的比重
train_percent = 0.8 # 训练样本占训练验证样本的比重
xmlfilepath = 'VOC2007/Annotations'
txtsavepath = 'VOC2007/ImageSets/Main'
total_xml = os.listdir(xmlfilepath)
num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train=random.sample(trainval,tr)
ftrainval = open('VOC2007/ImageSets/Main/trainval.txt', 'w') # 'a'为append
ftest = open('VOC2007/ImageSets/Main/test.txt', 'w')
ftrain = open('VOC2007/ImageSets/Main/train.txt', 'w')
fval = open('VOC2007/ImageSets/Main/val.txt', 'w')
for i in list:
if total_xml[i].endswith(".xml"):
print(total_xml[i])
name=total_xml[i][:-4]+'\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
mask2Xml.py
#!/usr/bin/env python3
from genericpath import exists
import os
import re
import fnmatch
from PIL import Image
import numpy as np
from pycococreatortools import pycococreatortools
from pycocotools import mask
from PIL import Image
import codecs
from glob import glob
import shutil
part = "train" # train test
ROOT_DIR = 'C:/Users/awei/Desktop/mask2Labelme/COD10K/'+part+'/'
IMAGE_DIR = os.path.join(ROOT_DIR, "Image")
ANNOTATION_DIR = os.path.join(ROOT_DIR, "GT")
def filter_for_jpeg(root, files):
file_types = ['*.jpeg', '*.jpg', '*.png']
file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
files = [os.path.join(root, f) for f in files]
files = [f for f in files if re.match(file_types, f)]
return files
def filter_for_annotations(root, files, image_filename):
file_types = ['*.png']
file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0]
file_name_prefix = basename_no_extension + '_.*' # 用于匹配对应的二值mask
files = [os.path.join(root, f) for f in files]
files = [f for f in files if re.match(file_types, f)]
files = [f for f in files if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])]
return files
saved_path = "VOC2007/"
# 2.创建要求文件夹
if not os.path.exists(saved_path + "Annotations"):
os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):
os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/Main/"):
os.makedirs(saved_path + "ImageSets/Main/")
ftrainval = open('VOC2007/ImageSets/Main/trainval.txt', 'a') # 'a'为append
ftest = open('VOC2007/ImageSets/Main/test.txt', 'w')
ftrain = open('VOC2007/ImageSets/Main/train.txt', 'w')
fval = open('VOC2007/ImageSets/Main/val.txt', 'w')
def splitData():
for root, _, files in os.walk(IMAGE_DIR):
image_files = filter_for_jpeg(root, files)
# go through each image
for image_filename in image_files:
if not os.path.exists(saved_path+ "JPEGImages/"+os.path.basename(image_filename)):
shutil.copy(image_filename, saved_path + "JPEGImages/")
name = os.path.basename(image_filename).split('.')[0]
if not os.path.exists(saved_path + "Annotations/"+name+".xml"):
print("not exist:"+name+".xml")
continue
if part=="train":
ftrain.write(name)
ftrain.write('\n')
else:
fval.write(name)
fval.write('\n')
ftrainval.write(name)
ftrainval.write('\n')
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
def main():
# filter for jpeg images
for root, _, files in os.walk(IMAGE_DIR):
image_files = filter_for_jpeg(root, files)
# go through each image
for image_filename in image_files:
image = Image.open(image_filename)
width,height,channels = image.size[0],image.size[1],image.layers
boxList = []
labels=[]
# filter for associated png annotations
for root, _, files in os.walk(ANNOTATION_DIR):
annotation_files = filter_for_annotations(root, files, image_filename)
# go through each associated annotation
for annotation_filename in annotation_files:
print(annotation_filename)
name = os.path.basename(annotation_filename)
image_name = name.split('_')[-3]
label = name.split('_')[1] #
binary_mask = np.asarray(Image.open(annotation_filename)
.convert('1')).astype(np.uint8)
if image is not None:
binary_mask = pycococreatortools.resize_binary_mask(binary_mask, image.size)
binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))
bounding_box = mask.toBbox(binary_mask_encoded)
boxList.append([bounding_box[0],bounding_box[1],bounding_box[0]+bounding_box[2],bounding_box[1]+bounding_box[3]])
labels.append(label)
# 读取标注信息并写入 xml
with codecs.open(saved_path + "Annotations/" + image_name + ".xml", "w", "utf-8") as xml:
xml.write('\n' )
xml.write('\t' + 'WH_data' + '\n')
xml.write('\t' + image_name + ".jpg" + '\n')
xml.write('\t\n' )
xml.write('\t\tWH Data \n')
xml.write('\t\tWH \n')
xml.write('\t\t
flickr\n')
xml.write('\t\tNULL \n')
xml.write('\t\n')
xml.write('\t\n' )
xml.write('\t\tNULL \n')
xml.write('\t\tWH \n')
xml.write('\t\n')
xml.write('\t\n' )
xml.write('\t\t' + str(width) + '\n')
xml.write('\t\t' + str(height) + '\n')
xml.write('\t\t' + str(channels) + '\n')
xml.write('\t\n')
xml.write('\t\t0 \n')
for box,label in zip(boxList,labels):
xmin, ymin, xmax,ymax = box
if xmax <= xmin:
pass
elif ymax <= ymin:
pass
else:
xml.write('\t\n')
print(image_filename, xmin, ymin, xmax, ymax, label)
xml.write('')
if __name__ == "__main__":
main() # 用于将mask生成xml
splitData() # 用于切分数据(适用非随机的切分)