labelme json2dataset.py 源代码解析

json2dataset.py,源代码2个特点

  • label value 是按照 label name 字母顺序排列的
  • label color 通过 labelme.utils colormap 生成

这样带来的问题是:我们标注的图片同一类对应的 label value 和 color 可能是不同的,所以要先确定下 value 和 color

1. json2labelpng.py 简易版

功能:json 文件转成 上色后的png

import base64
import json
import os
import PIL.Image
from labelme import utils

import numpy as np
import csv

color_dict = {}
with open('sun37.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    for i, row in enumerate(reader):
        if i > 0:  # 跳过第一行
            color_dict[row[0]] = [int(row[1]), int(row[2]), int(row[3])]

# label: val, 用于后面将 labelme 自生成的 val 替换为 大数据集的 val
label_val_dict = {}
for i, key in enumerate(color_dict.keys()):
    label_val_dict[key] = i

# RGB color
color_map = np.array(list(color_dict.values()))  # 38,3


def json2labelpng(json_file):
    data = json.load(open(json_file))  # json->dict

    if data['imageData']:
        imageData = data['imageData']
    else:
        imagePath = os.path.join(os.path.dirname(json_file), data['imagePath'])
        with open(imagePath, 'rb') as f:
            imageData = f.read()
            imageData = base64.b64encode(imageData).decode('utf-8')

    img = utils.img_b64_to_arr(imageData)

    # 使用已有的 label_val_dict,并结合已有的cmap上色
    label_name_to_value = {
        'background': 0  # 0 为背景
    }
    for shape in data['shapes']:
        label_name_to_value[shape['label']] = label_val_dict[shape['label']]

    lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)  # np,val_img
    # print(np.unique(lbl))  # [ 0  1  2  3  5 21 22 25 29] 已和 SUN 数据集一致

    # 使用自己的 colormap 并保存为 label.png
    lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode='P')  # Palette 模式
    lbl_pil.putpalette(color_map.astype(np.uint8).flatten())  # 转成 uint8 并展平
    lbl_pil.save(json_file.replace('.json', '.png'))


if __name__ == '__main__':
    json2labelpng(json_file='C:/Users/Shuai/PycharmProjects/Toy/labelme/0.json')
2. json2dataset.py 源代码解析
import argparse
import base64
import json
import os
import os.path as osp

import PIL.Image
import yaml

from labelme.logger import logger
from labelme import utils

import numpy as np
import csv

color_dict = {}
with open('sun37.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    for i, row in enumerate(reader):
        if i > 0:  # 跳过第一行
            color_dict[row[0]] = [int(row[1]), int(row[2]), int(row[3])]

# label: val, 用于后面将 labelme 自生成的 val 替换为 大数据集的 val
label_val_dict = {}
for i, key in enumerate(color_dict.keys()):
    label_val_dict[key] = i

# RGB color
color_map = np.array(list(color_dict.values()))  # 38,3


def main(params):
    logger.warning('This script is aimed to demonstrate how to convert the'
                   'JSON file to a single image dataset, and not to handle'
                   'multiple JSON files to generate a real-use dataset.')

    parser = argparse.ArgumentParser()
    parser.add_argument('json_file')
    parser.add_argument('-o', '--out', default=None)
    args = parser.parse_args(params)

    json_file = args.json_file

    if args.out is None:  # 如果没指定输出路径,设定 0_json 为输出文件夹
        out_dir = osp.basename(json_file).replace('.', '_')
        out_dir = osp.join(osp.dirname(json_file), out_dir)
    else:
        out_dir = args.out
    if not osp.exists(out_dir):  # osp, os.path
        os.mkdir(out_dir)

    data = json.load(open(json_file))  # json->dict

    if data['imageData']:
        imageData = data['imageData']
    else:
        imagePath = os.path.join(os.path.dirname(json_file), data['imagePath'])
        with open(imagePath, 'rb') as f:
            imageData = f.read()
            imageData = base64.b64encode(imageData).decode('utf-8')
    img = utils.img_b64_to_arr(imageData)

    # label name 转成对应数字

    # labelme 生成 label_name_to_value 方法
    # =======================================
    # label_name_to_value = {
    #     '_background_': 0  # 0 为背景
    # }
    # for shape in sorted(data['shapes'], key=lambda x: x['label']):  # 以 label_name 排序
    #     label_name = shape['label']
    #     if label_name in label_name_to_value:
    #         label_value = label_name_to_value[label_name]
    #     else:
    #         label_value = len(label_name_to_value)  # label_name_to_value 长度对应添加进来的 新 label 的编号
    #         label_name_to_value[label_name] = label_value
    # lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)
    # lbl 是由 label_value 组成的 480x640 图像,此时还没有上色
    # print(np.unique(lbl))  # [0 1 2 3 4 5 6 7 8],注意 lbl 的值和SUN数据集的 label value 并不对应,需要用类名建立对应关系
    # 这里用了 label_colormap 生成颜色
    # utils.lblsave(osp.join(out_dir, 'label.png'), lbl)
    # =======================================

    # 使用已有的 label_val_dict,并结合已有的cmap上色
    label_name_to_value = {
        'background': 0  # 0 为背景
    }
    for shape in data['shapes']:
        label_name_to_value[shape['label']] = label_val_dict[shape['label']]

    lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)
    # print(np.unique(lbl))  # [ 0  1  2  3  5 21 22 25 29] 已和 SUN 数据集一致

    # 因为修改了val,原有功能不能实现了
    # label_names = [None] * (max(label_name_to_value.values()) + 1)  # 做个定长数组
    # for name, value in label_name_to_value.items():  # dict
    #     label_names[value] = name
    # lbl_viz = utils.draw_label(lbl, img, label_names)

    # 保存三张图:img.png, label.png, label_viz.png
    PIL.Image.fromarray(img).save(osp.join(out_dir, 'img.png'))

    # 使用自己的 colormap 并保存为 label.png
    lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode='P')  # Palette 模式
    lbl_pil.putpalette(color_map.astype(np.uint8).flatten())  # 转成 uint8 并展平
    lbl_pil.save(osp.join(out_dir, 'label.png'))

    # label_viz.png
    # PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, 'label_viz.png'))

    # with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
    #     for lbl_name in label_names:
    #         f.write(lbl_name + '\n')

    # logger.warning('info.yaml is being replaced by label_names.txt')
    # info = dict(label_names=label_names)
    # with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
    #     yaml.safe_dump(info, f, default_flow_style=False)
    #
    # logger.info('Saved to: {}'.format(out_dir))


if __name__ == '__main__':
    params = [
        '0.json',  # 第1个参数
        '-o', '0_json'
    ]
    main(params)

你可能感兴趣的:(labelme json2dataset.py 源代码解析)