使用labelme交互界面保存标注结果,会得到.json
文件,其中保存了标注label信息。而对于图像分割任务,需要其对应的标注是.png
/.bmp
等格式的图像文件。
单个json文件的转化可通过以下步骤实现:
(1) 打开cmd
激活labelme
环境
(2) 运行labelme_json_to_dataset <文件名>.json
而实际中我们希望能对文件夹下多个json文件进行批量处理。这时我们需要将Anaconda所在路径\envs\Lib\site-packages\labelme\cli下的json_to_dataset.py文件
进行修改。参考博客中的代码对于module的调用存在问题,通过修改module调用关系,我们将代码写成了以下内容(注意修改之后visualize中的label颜色并不正确)。
import argparse
import json
import os
import os.path as osp
import warnings
import copy
import numpy as np
import PIL.Image
from skimage import io
import yaml
from labelme import utils
def main():
parser = argparse.ArgumentParser()
parser.add_argument('json_file')
parser.add_argument('-o', '--out', default=None)
args = parser.parse_args()
json_file = args.json_file
list = os.listdir(json_file)
for i in range(0, len(list)):
path = os.path.join(json_file, list[i])
filename = list[i][:-5] # .json
if os.path.isfile(path):
data = json.load(open(path))
img = utils.image.img_b64_to_arr(data['imageData'])
lbl, lbl_names = utils.shape.labelme_shapes_to_label(img.shape, data['shapes']) # labelme_shapes_to_label
captions = ['%d: %s' % (l, name) for l, name in enumerate(lbl_names)]
lbl_viz = utils.draw.draw_label(lbl, img, captions)
out_dir = osp.basename(list[i]).replace('.', '_')
out_dir = osp.join(osp.dirname(list[i]), out_dir)
if not osp.exists(out_dir):
os.mkdir(out_dir)
PIL.Image.fromarray(img).save(osp.join(out_dir, '{}.png'.format(filename)))
PIL.Image.fromarray(lbl).save(osp.join(out_dir, '{}_gt.png'.format(filename)))
PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, '{}_viz.png'.format(filename)))
with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
for lbl_name in lbl_names:
f.write(lbl_name + '\n')
warnings.warn('info.yaml is being replaced by label_names.txt')
info = dict(label_names=lbl_names)
with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
yaml.safe_dump(info, f, default_flow_style=False)
print('Saved to: %s' % out_dir)
if __name__ == '__main__':
main()
以上代码可以将json文件中的label存储为png
图像文件。但是存在一个问题:对于多类分割任务,任意一张图可能不包含所有分类。因此整个文件夹下生成的所有label图像中,不同图像中的相同类别的目标在label.png
中可能对应不同的灰度值,使标注的label不具备统一性,因而出错。
为了解决该问题,将代码改为以下形式:
import argparse
import json
import os
import os.path as osp
import warnings
import copy
import numpy as np
import PIL.Image
from skimage import io
import yaml
from labelme import utils
NAME_LABEL_MAP = {
'_background_': 0,
"baseball_diamond": 1,
"tennis_court": 2,
"basketball_court": 3,
"ground_track_field": 4,
}
LABEL_NAME_MAP = {
0: '_background_',
1: "airplane",
2: "ship",
3: "storage_tank",
4: "baseball_diamond",
5: "tennis_court",
6: "basketball_court",
7: "ground_track_field",
8: "harbor",
9: "bridge",
10: "vehicle",
}
def main():
parser = argparse.ArgumentParser()
parser.add_argument('json_file')
parser.add_argument('-o', '--out', default=None)
args = parser.parse_args()
json_file = args.json_file
list = os.listdir(json_file)
for i in range(0, len(list)):
path = os.path.join(json_file, list[i])
filename = list[i][:-5] # .json
if os.path.isfile(path):
data = json.load(open(path))
img = utils.image.img_b64_to_arr(data['imageData'])
lbl, lbl_names = utils.shape.labelme_shapes_to_label(img.shape, data['shapes']) # labelme_shapes_to_label
# modify labels according to NAME_LABEL_MAP
lbl_tmp = copy.copy(lbl)
for key_name in lbl_names:
old_lbl_val = lbl_names[key_name]
new_lbl_val = NAME_LABEL_MAP[key_name]
lbl_tmp[lbl == old_lbl_val] = new_lbl_val
lbl_names_tmp = {}
for key_name in lbl_names:
lbl_names_tmp[key_name] = NAME_LABEL_MAP[key_name]
# Assign the new label to lbl and lbl_names dict
lbl = np.array(lbl_tmp, dtype=np.int8)
lbl_names = lbl_names_tmp
captions = ['%d: %s' % (l, name) for l, name in enumerate(lbl_names)]
lbl_viz = utils.draw.draw_label(lbl, img, captions)
out_dir = osp.basename(list[i]).replace('.', '_')
out_dir = osp.join(osp.dirname(list[i]), out_dir)
if not osp.exists(out_dir):
os.mkdir(out_dir)
PIL.Image.fromarray(img).save(osp.join(out_dir, '{}.png'.format(filename)))
PIL.Image.fromarray(lbl).save(osp.join(out_dir, '{}_gt.png'.format(filename)))
PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, '{}_viz.png'.format(filename)))
with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
for lbl_name in lbl_names:
f.write(lbl_name + '\n')
warnings.warn('info.yaml is being replaced by label_names.txt')
info = dict(label_names=lbl_names)
with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
yaml.safe_dump(info, f, default_flow_style=False)
print('Saved to: %s' % out_dir)
if __name__ == '__main__':
main()
通过建立全局标签值字典,可以控制label图像中目标对应相同的灰度值,从而保证标签在所有图像中的一致性。同时我们修改了图像存储时的命名,使其命名与原图命名对应。
批量处理的步骤如下:
打开cmd,激活labelme
环境,同时进入到文件保存路径下;
输入命令如下:
labelme_json_to_dataset E:\json
可以实现批量激活。其中E:\json
是待批量处理的json文件所在的文件夹目录。而批量处理后的文件将存储在cmd环境下进入的当前目录下。
labelme_json_dataset生成的标注图像文件是每个json对应一个文件夹,文件多了手动一张张的复制劳动量就很大,于是写了以下代码来进行批量复制
import os
import random
import shutil
import re
GT_from_PATH = "./jsons"
GT_to_PATH = "./gts"
def copy_file(from_dir, to_dir, Name_list):
if not os.path.isdir(to_dir):
os.mkdir(to_dir)
# 1
# name_list = os.listdir(from_dir)
# # 2
# sample = random.sample(pathDir, 2)
# print(sample)
# 3
for name in Name_list:
try:
# print(name)
if not os.path.isfile(os.path.join(from_dir, name)):
print("{} is not existed".format(os.path.join(from_dir, name)))
shutil.copy(os.path.join(from_dir, name), os.path.join(to_dir, name))
# print("{} has copied to {}".format(os.path.join(from_dir, name), os.path.join(to_dir, name)))
except:
# print("failed to move {}".format(from_dir + name))
pass
# shutil.copyfile(fileDir+name, tarDir+name)
print("{} has copied to {}".format(from_dir, to_dir))
if __name__ == '__main__':
filepath_list = os.listdir(GT_from_PATH)
# print(name_list)
for i, file_path in enumerate(filepath_list):
gt_path = "{}/{}_gt.png".format(os.path.join(GT_from_PATH, filepath_list[i]), file_path[:-5])
print("copy {} to ...".format(gt_path))
gt_name = ["{}_gt.png".format(file_path[:-5])]
gt_file_path = os.path.join(GT_from_PATH, file_path)
copy_file(gt_file_path, GT_to_PATH, gt_name)
只需将GT_from_PATH设置为所有json文件夹所在根目录
即可。
参考
-weixin_41831559的博客