json文件批量转化——labelme制备分割数据集

Labelme制备分割数据集

使用方法

  1. 在cmd中输入activate labelme激活labelme环境。
  2. 在激活环境中输入labelme即可打开labelme界面。
  3. 退出输入deactivate即可。

json文件转化为图像文件

使用labelme交互界面保存标注结果,会得到.json文件,其中保存了标注label信息。而对于图像分割任务,需要其对应的标注是.png/.bmp等格式的图像文件。

单个json文件的转化可通过以下步骤实现:

(1) 打开cmd激活labelme环境
(2) 运行labelme_json_to_dataset <文件名>.json

而实际中我们希望能对文件夹下多个json文件进行批量处理。这时我们需要将Anaconda所在路径\envs\Lib\site-packages\labelme\cli下的json_to_dataset.py文件进行修改。参考博客中的代码对于module的调用存在问题,通过修改module调用关系,我们将代码写成了以下内容(注意修改之后visualize中的label颜色并不正确)。

import argparse
import json
import os
import os.path as osp
import warnings
import copy


import numpy as np
import PIL.Image
from skimage import io
import yaml

from labelme import utils

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('json_file')
    parser.add_argument('-o', '--out', default=None)
    args = parser.parse_args()

    json_file = args.json_file

    list = os.listdir(json_file)
    for i in range(0, len(list)):
        path = os.path.join(json_file, list[i])
        filename = list[i][:-5]       # .json
        if os.path.isfile(path):
            data = json.load(open(path))
            img = utils.image.img_b64_to_arr(data['imageData'])
            lbl, lbl_names = utils.shape.labelme_shapes_to_label(img.shape, data['shapes'])  # labelme_shapes_to_label

            captions = ['%d: %s' % (l, name) for l, name in enumerate(lbl_names)]
            lbl_viz = utils.draw.draw_label(lbl, img, captions)
            out_dir = osp.basename(list[i]).replace('.', '_')
            out_dir = osp.join(osp.dirname(list[i]), out_dir)
            if not osp.exists(out_dir):
                os.mkdir(out_dir)

            PIL.Image.fromarray(img).save(osp.join(out_dir, '{}.png'.format(filename)))
            PIL.Image.fromarray(lbl).save(osp.join(out_dir, '{}_gt.png'.format(filename)))
            PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, '{}_viz.png'.format(filename)))

            with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
                for lbl_name in lbl_names:
                    f.write(lbl_name + '\n')

            warnings.warn('info.yaml is being replaced by label_names.txt')
            info = dict(label_names=lbl_names)
            with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
                yaml.safe_dump(info, f, default_flow_style=False)

            print('Saved to: %s' % out_dir)


if __name__ == '__main__':
    main()

以上代码可以将json文件中的label存储为png图像文件。但是存在一个问题:对于多类分割任务,任意一张图可能不包含所有分类。因此整个文件夹下生成的所有label图像中,不同图像中的相同类别的目标在label.png中可能对应不同的灰度值,使标注的label不具备统一性,因而出错。

为了解决该问题,将代码改为以下形式:

import argparse
import json
import os
import os.path as osp
import warnings
import copy

import numpy as np
import PIL.Image
from skimage import io
import yaml

from labelme import utils

NAME_LABEL_MAP = {
    '_background_': 0,
    "baseball_diamond": 1,
    "tennis_court": 2,
    "basketball_court": 3,
    "ground_track_field": 4,
}

LABEL_NAME_MAP = {
    0: '_background_',
    1: "airplane",
    2: "ship",
    3: "storage_tank",
    4: "baseball_diamond",
    5: "tennis_court",
    6: "basketball_court",
    7: "ground_track_field",
    8: "harbor",
    9: "bridge",
    10: "vehicle",
}


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('json_file')
    parser.add_argument('-o', '--out', default=None)
    args = parser.parse_args()

    json_file = args.json_file

    list = os.listdir(json_file)
    for i in range(0, len(list)):
        path = os.path.join(json_file, list[i])
        filename = list[i][:-5]       # .json
        if os.path.isfile(path):
            data = json.load(open(path))
            img = utils.image.img_b64_to_arr(data['imageData'])
            lbl, lbl_names = utils.shape.labelme_shapes_to_label(img.shape, data['shapes'])  # labelme_shapes_to_label

            # modify labels according to NAME_LABEL_MAP
            lbl_tmp = copy.copy(lbl)
            for key_name in lbl_names:
                old_lbl_val = lbl_names[key_name]
                new_lbl_val = NAME_LABEL_MAP[key_name]
                lbl_tmp[lbl == old_lbl_val] = new_lbl_val
            lbl_names_tmp = {}
            for key_name in lbl_names:
                lbl_names_tmp[key_name] = NAME_LABEL_MAP[key_name]

            # Assign the new label to lbl and lbl_names dict
            lbl = np.array(lbl_tmp, dtype=np.int8)
            lbl_names = lbl_names_tmp

            captions = ['%d: %s' % (l, name) for l, name in enumerate(lbl_names)]
            lbl_viz = utils.draw.draw_label(lbl, img, captions)
            out_dir = osp.basename(list[i]).replace('.', '_')
            out_dir = osp.join(osp.dirname(list[i]), out_dir)
            if not osp.exists(out_dir):
                os.mkdir(out_dir)

            PIL.Image.fromarray(img).save(osp.join(out_dir, '{}.png'.format(filename)))
            PIL.Image.fromarray(lbl).save(osp.join(out_dir, '{}_gt.png'.format(filename)))
            PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, '{}_viz.png'.format(filename)))

            with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
                for lbl_name in lbl_names:
                    f.write(lbl_name + '\n')

            warnings.warn('info.yaml is being replaced by label_names.txt')
            info = dict(label_names=lbl_names)
            with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
                yaml.safe_dump(info, f, default_flow_style=False)

            print('Saved to: %s' % out_dir)


if __name__ == '__main__':
    main()

通过建立全局标签值字典,可以控制label图像中目标对应相同的灰度值,从而保证标签在所有图像中的一致性。同时我们修改了图像存储时的命名,使其命名与原图命名对应。

批量处理的步骤如下:

  1. 打开cmd,激活labelme环境,同时进入到文件保存路径下;

  2. 输入命令如下:

    labelme_json_to_dataset E:\json

可以实现批量激活。其中E:\json是待批量处理的json文件所在的文件夹目录。而批量处理后的文件将存储在cmd环境下进入的当前目录下。

从文件夹中批量获取ground truth文件

labelme_json_dataset生成的标注图像文件是每个json对应一个文件夹,文件多了手动一张张的复制劳动量就很大,于是写了以下代码来进行批量复制

import os
import random
import shutil
import re


GT_from_PATH = "./jsons"
GT_to_PATH = "./gts"


def copy_file(from_dir, to_dir, Name_list):
    if not os.path.isdir(to_dir):
        os.mkdir(to_dir)
    # 1
    # name_list = os.listdir(from_dir)
 
    # # 2
    # sample = random.sample(pathDir, 2)
    # print(sample)

    # 3
    for name in Name_list:
        try:
            # print(name)
            if not os.path.isfile(os.path.join(from_dir, name)):
                print("{} is not existed".format(os.path.join(from_dir, name)))
            shutil.copy(os.path.join(from_dir, name), os.path.join(to_dir, name))
            # print("{} has copied to {}".format(os.path.join(from_dir, name), os.path.join(to_dir, name)))
        except:
            # print("failed to move {}".format(from_dir + name))
            pass
        # shutil.copyfile(fileDir+name, tarDir+name)
    print("{} has copied to {}".format(from_dir, to_dir))


if __name__ == '__main__':
    filepath_list = os.listdir(GT_from_PATH)
    # print(name_list)
    for i, file_path in enumerate(filepath_list):
        gt_path = "{}/{}_gt.png".format(os.path.join(GT_from_PATH, filepath_list[i]), file_path[:-5])
        print("copy {} to ...".format(gt_path))
        gt_name = ["{}_gt.png".format(file_path[:-5])]
        gt_file_path = os.path.join(GT_from_PATH, file_path)
        copy_file(gt_file_path, GT_to_PATH, gt_name)

只需将GT_from_PATH设置为所有json文件夹所在根目录即可。


参考

-weixin_41831559的博客

你可能感兴趣的:(图像分割)