最近一直在从事图像语义分割方面的工作,在此记录一下语义分割数据集的制作,这里使用的是labelme来进行标注,标注过程就不详细说明了,主要讲一下标注完成后的数据转化过程。
一共需要1个文件夹和3个文件:img_json、labels.txt、json2voc.py和split_train_val.py
- img_json
这个文件夹里存放了标注好的json文件与对应的图像文件。
- labels.txt
这个文件需要自己手动创建内容如下:
其中ignore和background要必写,后面的类别按自己的项目写。
上面两样东西准备好后,一次执行下面的py脚本就行。
- json2voc.py
用于将json文件转化到VOC格式
#!/usr/bin/env python
#encoding=gbk
from __future__ import print_function
import argparse
import glob
import os
import os.path as osp
import sys
import imgviz
import numpy as np
import labelme
def main(args):
if osp.exists(args.output_dir):
print("文件夹已存在,请先删除:", args.output_dir)
sys.exit(1)
os.makedirs(args.output_dir)
os.makedirs(osp.join(args.output_dir, "JPEGImages"))
os.makedirs(osp.join(args.output_dir, "SegmentationClassnpy"))
os.makedirs(osp.join(args.output_dir, "SegmentationClass"))
if not args.noviz:
os.makedirs(
osp.join(args.output_dir, "SegmentationClassVisualization")
)
print("Creating dataset:", args.output_dir)
class_names = []
class_name_to_id = {}
for i, line in enumerate(open(args.labels,encoding='utf-8').readlines()):
class_id = i - 1 # starts with -1
class_name = line.strip()
class_name_to_id[class_name] = class_id
if class_id == -1:
assert class_name == "__ignore__"
continue
elif class_id == 0:
assert class_name == "_background_"
class_names.append(class_name)
class_names = tuple(class_names)
print("class_names:", class_names)
out_class_names_file = osp.join(args.output_dir, "class_names.txt")
with open(out_class_names_file, "w") as f:
f.writelines("\n".join(class_names))
print("Saved class_names:", out_class_names_file)
for filename in glob.glob(osp.join(args.input_dir, "*.json")):
print("Generating dataset from:", filename)
label_file = labelme.LabelFile(filename=filename)
base = osp.splitext(osp.basename(filename))[0]
out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg")
out_lbl_file = osp.join(
args.output_dir, "SegmentationClassnpy", base + ".npy"
)
out_png_file = osp.join(
args.output_dir, "SegmentationClass", base + ".png"
)
if not args.noviz:
out_viz_file = osp.join(
args.output_dir,
"SegmentationClassVisualization",
base + ".jpg",
)
with open(out_img_file, "wb") as f:
f.write(label_file.imageData)
img = labelme.utils.img_data_to_arr(label_file.imageData)
lbl, _ = labelme.utils.shapes_to_label(
img_shape=img.shape,
shapes=label_file.shapes,
label_name_to_value=class_name_to_id,
)
labelme.utils.lblsave(out_png_file, lbl)
np.save(out_lbl_file, lbl)
if not args.noviz:
viz = imgviz.label2rgb(
label=lbl,
# img=imgviz.rgb2gray(img),
image=img,
font_size=15,
label_names=class_names,
loc="rb",
)
imgviz.io.imsave(out_viz_file, viz)
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--input_dir", default="./img_json",help="存放了图像和json的文件夹")
parser.add_argument("--output_dir", default="./out",help=''会自动创建")
parser.add_argument("--labels", default="./labels.txt", help="全局标签文件")
parser.add_argument("--noviz", help="no visualization", action="store_true")
args = parser.parse_args()
return args
if __name__ == "__main__":
args = get_args()
main(args)
- split_train_val.py
拆分数据集
#encoding=gbk
from sklearn.model_selection import train_test_split
import os
rootpath=r'./out'
imagedir=os.path.join(rootpath,'JPEGImages')
pathout=os.path.join(rootpath,'ImageSets','Segmentation')
if not os.path.isdir(pathout):
os.makedirs(pathout)
images = []
for file in os.listdir(imagedir):
filename = file.split('.')[0]
images.append(filename)
train, test = train_test_split(images, train_size=0.9, random_state=0)
val, test = train_test_split(test, train_size=0.1 / 0.2, random_state=0)
with open(os.path.join(pathout, "train.txt"), 'w') as f:
f.write('\n'.join(train))
with open(os.path.join(pathout,"val.txt"), 'w') as f:
f.write('\n'.join(val))
with open(os.path.join(pathout,"test.txt"), 'w') as f:
f.write('\n'.join(test))
最终会在out文件夹下得到如下内容:
前三个文件是实际上模型训练需要的。
还有一点就是SegmentationClass文件夹下的图像是索引图,是单通道的,但是是彩色显示的。
from PIL import Image
import numpy as np
import cv2
img=Image.open('1.png')
img=np.array(img)
print(img.shape)
img2=cv2.imread('1.png')
print(img2.shape)
其中1.png是SegmentationClass文件夹下的图像,使用上述代码可以发现,竟然输出结果不同,前者是单通道,后者是3通道,而且直接打印图像的话也不同。所以我们是以PIL的结果为准的,因为:
图像属性显示位深度是8而不是24。