查资料,是因为 labelme 打标时一般都是多边形,所有 points 的个数必然大于等于3,但是我的数据集中存在只有一个 point 的标签,所以加了一个 fause_json 函数来筛选出错的 json,以便于转化可以顺利进行
import argparse
import json
import os
import os.path as osp
import warnings
import numpy as np
warnings.filterwarnings('ignore')
import PIL.Image
import base64
def fause_json(jsonfilelist, json_path, fause_json_list):
'''
筛选因为形状问题无法转化的 json 文件
'''
for i in jsonfilelist:
f = open(json_path + "\\" + i, "r")
jsonfile = json.loads(f.read())
for j in jsonfile['shapes']:
point_list = j['points']
if len(point_list) < 3:
fause_json_list.append(i)
return fause_json_list
# def main():
# '''
# json 转我们需要的数据格式
# '''
# count = os.listdir("F:\luzice\\") # 自己的路径
# index = 0
# old_jsonlist = os.listdir("F:\luzice") # 自己的路径
# repeat_list = []
# for i in range(0, len(count)):
# if (count[i] not in old_jsonlist) and (count[i] not in fause_json_list):
# path = os.path.join("F:\luzice", count[i])
# if os.path.isfile(path) and path.endswith('json'):
# data = json.load(open(path))
# if data['imageData']:
# imageData = data['imageData']
# else:
# imagePath = os.path.join(os.path.dirname(path), data['imagePath'])
# with open(imagePath, 'rb') as f:
# imageData = f.read()
# imageData = base64.b64encode(imageData).decode('utf-8')
#
# img = utils.img_b64_to_arr(imageData)
# label_name_to_value = {'_background_': 0}
# for shape in data['shapes']:
# label_name = shape['label']
# if label_name in label_name_to_value:
# label_value = label_name_to_value[label_name]
# else:
# label_value = len(label_name_to_value)
# label_name_to_value[label_name] = label_value
# label_values, label_names = [], [] # label_values must be dense
# for ln, lv in sorted(label_name_to_value.items(), key=lambda x: x[1]):
# label_values.append(lv)
# label_names.append(ln)
#
# assert label_values == list(range(len(label_values)))
#
# lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)
#
# captions = ['{}:{}'.format(lv, ln)
# for ln, lv in label_name_to_value.items()]
# lbl_viz = utils.draw_label(lbl, img, captions)
#
# if not os.path.exists("D:\\train_dataset"):
# os.mkdir("D:\\train_dataset")
# label_path = "D:\\train_dataset\\mask"
# if not os.path.exists(label_path):
# os.mkdir(label_path)
# img_path = "D:\\train_dataset\\imgs"
# if not os.path.exists(img_path):
# os.mkdir(img_path)
# yaml_path = "D:\\train_dataset\\yaml"
# if not os.path.exists(yaml_path):
# os.mkdir(yaml_path)
# label_viz_path = "D:\\train_dataset\\label_viz"
# if not os.path.exists(label_viz_path):
# os.mkdir(label_viz_path)
#
# PIL.Image.fromarray(img).save(osp.join(img_path, str(index) + '.jpg'))
#
# utils.lblsave(osp.join(label_path, str(index) + '.png'), lbl)
# PIL.Image.fromarray(lbl_viz).save(osp.join(label_viz_path, str(index) + '.png'))
#
# warnings.warn('info.yaml is being replaced by label_names.txt')
# info = dict(label_names=label_names)
# with open(osp.join(yaml_path, str(index) + '.yaml'), 'w') as f:
# yaml.safe_dump(info, f, default_flow_style=False)
# index = index + 1
# print(f"Saved : {str(index)}")
# else:
# repeat_list.append(count[i])
# print(f"{count[i]} 已存在")
#
# print("全部转化完成!")
# return repeat_list
if __name__ == '__main__':
json_path = "F:\luzice" # 存储 json 文件的路径,用自己的路径
jsonfilelist = os.listdir(json_path)
fause_json_list = []
fause_json_list = fause_json(jsonfilelist, json_path, fause_json_list)
print(np.array(fause_json_list))
# repeat_list = main()
# 运行该函数后输出的两个比较重要的列表
# fause_json_list:因为形状问题无法转化的 json 文件列表;
# repeat_list:和已存在的 json 文件重复的 json 文件列表
:
参考地址:labelme 打标数据 json_to_dataset,以及 yaml 文件标签替换问题