python批量数据处理脚本——yolo标签的json格式转darknet格式

在训练yolov5(yolo系列中yolov5用的比较多,就拿yolov5来举例好了)前首先需要标定数据集,可能标完几千张时发现自己标的竟然是json格式(而yolov5训练需要的是darknet格式),此时你的心情会不会气得吐血,想着又要重新标定了?
又或者你在网上找了非常久终于找到类一批很满意的数据集以及还有好心人给你标定好了开源使用,但是却发现也只有json格式,此时你的心情会不会犹如坐过山车一般看到了希望到达顶端,有忽然希望破灭跌倒了底端?
不用怕,可以使用python脚本转化一下即可使用了!

import json
import os
import shutil
import cv2
import os
from numpy.lib.twodim_base import triu_indices_from
import pandas as pd
from glob import glob
import codecs

print(cv2.__version__)


def getBoundingBox(points):              # 找到两个点的x最大最小值,y最大最小值
    xmin = points[0][0]
    xmax = points[0][0]
    ymin = points[0][1]
    ymax = points[0][1]
    for p in points:
        if p[0] > xmax:
            xmax = p[0]
        elif p[0] < xmin:
            xmin = p[0]

        if p[1] > ymax:
            ymax = p[1]
        elif p[1] < ymin:
            ymin = p[1]
    return [int(xmin), int(xmax), int(ymin), int(ymax)]


def json2txt(json_path, midTxt_path):
    json_data = json.load(open(json_path))         # 读取并加载json文件
    img_h = json_data["imageHeight"]
    img_w = json_data["imageWidth"]
    shape_data = json_data["shapes"]
    shape_data_len = len(shape_data)
    img_name = os.path.split(json_path)[-1].split(".json")[0]    # 以“PATH”中最后一个‘/’作为分隔符
    name = img_name + '.jpg'                            # 找到json文件对应的图片
    data = ''
    for i in range(shape_data_len):
        lable_name = shape_data[i]["label"]             # 获取一个json文件里的每个标签的名称
        points = shape_data[i]["points"]                # 获取一个json文件里的每个标签的坐标位置
        [xmin, xmax, ymin, ymax] = getBoundingBox(points)
        if xmin <= 0:
            xmin = 0
        if ymin <= 0:
            ymin = 0
        if xmax >= img_w:  
            xmax = img_w - 1
        if ymax >= img_h:
            ymax = img_h - 1
        b = name + ' ' + lable_name + ' ' + str(xmin) + ' ' + str(ymin) + ' ' + str(xmax) + ' ' + str(ymax)
        print(b)
        data += b + '\n'
    with open(midTxt_path + '/' + img_name + ".txt", 'w', encoding='utf-8') as f:    
        f.writelines(data)                                                   # 创建文件并写入相关数据
                                                   #写入数据:图片文件名,每个图片的标签名、xy最大最小值

def txt2darknet(midTxt_path, img_path):
    data = pd.DataFrame()
    filelist = os.listdir(midTxt_path) 
    for file in filelist:                                                   
        file_path = os.path.join(midTxt_path, file)
        filename = os.path.splitext(file)[0]
        imgName = filename + '.jpg'
        imgPath = os.path.join(img_path, imgName)
        img = cv2.imread(imgPath)
        [img_h, img_w, _] = img.shape
        data = ""
        with codecs.open(file_path, 'r', encoding='utf-8',errors='ignore') as f1:
            for line in f1.readlines():
                line = line.strip('\n')
                a = line.split(' ')
                if a[1] == 'other' or a[1] == 'mask' or a[1] == 'del': continue
                if a[1] == 'head': 
                    a[1] = '0'
                elif a[1] == 'hat':
                    a[1] = '1'
                elif a[1] == 'helmet':
                    a[1] = '2'
                # 这里是自己命名的类别及对应的数字

                x1 = float(a[2])
                y1 = float(a[3])
                w = float(a[4]) - float(a[2])
                h = float(a[5]) - float(a[3])

                if w <= 15 and h <= 15: continue

                center_x = float(a[2]) + w / 2
                center_y = float(a[3]) + h / 2
                a[2] = str(center_x / img_w)
                a[3] = str(center_y / img_h)
                a[4] = str(w / img_w)
                a[5] = str(h / img_h)
                b = a[1] + ' ' + a[2] + ' ' + a[3] + ' ' + a[4] + ' ' + a[5]
                print(b)
                data += b + '\n'
        with open(saved_path + '/' + filename + ".txt", 'w', encoding='utf-8') as f2:    
            f2.writelines(data)

json_path = "json文件的文件夹路径"
midTxt_path = "中间存放的tmp文件夹会自动删除"
img_path = '图片文件夹路径'
saved_path = '存放darknet格式文件的文件夹路径'

if not os.path.exists(midTxt_path):
    os.mkdir(midTxt_path)

filelist = os.listdir(json_path)                    # 取文件夹下的所有文件或文件夹
for file in filelist:
    old_dir = os.path.join(json_path, file)         # 路径与每个文件(夹)拼接
    if os.path.isdir(old_dir):
        continue                                    # 如果是文件夹跳过,只要文件
    filetype = os.path.splitext(file)[1]            
    if(filetype != ".json"): continue               #取文件后缀,只要json格式的
    json2txt(old_dir, midTxt_path)

txt2darknet(midTxt_path, img_path)
shutil.rmtree(midTxt_path)

下一节讲darknet转json。
有不懂的欢迎私聊或者交流,欢迎大神指正。

你可能感兴趣的:(python)