数据集标签xml格式和单个json文件转换成yolo的txt格式

1.单个json文件转yolo的txt格式

将单个json文件转换成每张图片对应一个的txt格式,对应的开源数据集有flir数据集,FREE - FLIR Thermal Dataset for Algorithm Training | Teledyne FLIR

# -*- coding:utf-8 -*-
import json
import os 
from os import listdir, getcwd
from os.path import join
import os.path
import glob
import cv2
rootdir='./data/'#写自己存放图片的数据地址
labeldir='./label_txt/'
imgdir='./lwir/'

 
def convert(size, bbox):#该函数将xmin,ymin,xmax,ymax转为x,y,w,h中心点坐标和宽高
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = bbox['x']
    y = bbox['y']
    w = bbox['w']
    h = bbox['h']
    x = x+w/2
    y = y+h/2
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)
 


def choose_label(label,bbox,label_txt):
    if (label in ['person']):
       cls_id=0
       label_txt.write(str(cls_id) + " " + " ".join([str(a) for a in bbox]) + '\n')
    elif (label in ['bike','motor','rider']):
       cls_id=1
       label_txt.write(str(cls_id) + " " + " ".join([str(a) for a in bbox]) + '\n')
    elif (label in ['car']):
       cls_id=2
       label_txt.write(str(cls_id) + " " + " ".join([str(a) for a in bbox]) + '\n')
    elif (label in ['bus']):
       cls_id=3
       label_txt.write(str(cls_id) + " " + " ".join([str(a) for a in bbox]) + '\n')
    elif (label in ['truck']):
       cls_id=4
       label_txt.write(str(cls_id) + " " + " ".join([str(a) for a in bbox]) + '\n')
    elif (label in ['light']):
       cls_id=5
       label_txt.write(str(cls_id) + " " + " ".join([str(a) for a in bbox]) + '\n')   
    elif (label in ['sign']):
       cls_id=6
       label_txt.write(str(cls_id) + " " + " ".join([str(a) for a in bbox]) + '\n') 


def image_id(rootdir):
    a=[]
    paths = glob.glob(os.path.join(rootdir, '*.json'))
    paths.sort()
    #print(paths)
    for filename in paths:
            filename=filename.split(rootdir)[1].split('.json')[0]
            print(filename)
            a.append(filename)
    return a



load_f=open('./index.json','r')
index_json_dict = json.load(load_f)
frames=index_json_dict['frames']
num_frames=len(frames)
num_label=50000

for i in range(num_frames):
    frame=frames[i]
    video_data=frame['videoMetadata']
    video_name=video_data['videoId']
    frame_index=video_data['frameIndex']
    frame_id=frame['datasetFrameId']
    frame_name='video-'+video_name+'-frame-'+str(frame_index).zfill(6)+'-'+frame_id+'.jpg'
    print(frame_name)
    W=int(frame['width'])
    H=int(frame['height'])
    #print('w',W,'h',H)
    annotations=frame['annotations']
    state_write=0
    for j in range(len(annotations)):
        annotation=annotations[j]
        labels=annotation['labels']
        label=labels[0]
        print(label)
        if (label in ['person','bike','motor','rider','car','bus','truck','light','sign']): 
           state_write=1
           print('1')
    if state_write==1:
       print('write!')
       label_txt = open(labeldir+'%s.txt'%(str(num_label).zfill(6)), 'w')
       for j in range(len(annotations)):
           annotation=annotations[j]
           labels=annotation['labels']
           label=labels[0]
           bboxes=annotation['boundingBox']
           #print(bboxes)
           bbox=convert((W,H),bboxes)
           #print(bbox)
           choose_label(label,bbox,label_txt)
       img=cv2.imread(rootdir+frame_name)
       cv2.imwrite(imgdir+str(num_label).zfill(6)+'.jpg',img)
       num_label=num_label+1

2.xml格式转yolo的txt格式

每个图片都对应一个xml标签,将其转成每张图片都对应一个yolo的txt标签,对应的开源数据集有MFNet dataset,https://www.mi.t.u-tokyo.ac.jp/static/projects/mil_multispectral/

# coding: utf-8

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import cv2
from sklearn.model_selection import train_test_split

classes = ['person', 'car', 'bike', 'color_cone', 'car_stop', 'bump', 'hole', 'animal', 'unknown']



size_dict = {
    "rgb": [320, 256],
    "finf": [640, 480],
    "ninf": [320, 256],
    "minf": [320, 256]
}


image_ids_dict = {}
image_ids_all = os.listdir('./Annotations_Converted/')
image_ids_all = [x.replace('.xml', '') for x in image_ids_all]


train_ids, test_ids = train_test_split(image_ids_all, test_size=0.2, random_state=42)
image_ids_dict['train'] = train_ids
image_ids_dict['test'] = test_ids


# In[65]:
def check(x):
    if x<0:
       x=0
    elif x>1:
       x=1
    return x
    
def convert(size, box):
    dw = 1./size[0]
    dh = 1./size[1]
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    x=check(x)
    y=check(y)
    w=check(w)
    h=check(h)
    return (x,y,w,h)

def judge(image_id, modal):
    in_file = open('./Annotations_Converted/%s.xml'%(image_id))
    tree=ET.parse(in_file)
    root = tree.getroot()
    root  = root.find(modal)

    w, h = size_dict[modal]
    state=0

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls in ['person','bike','car'] and int(difficult) != 1:
           state=1
    return state
           
        
def convert_annotation(image_id, modal, num):
    in_file = open('./Annotations_Converted/%s.xml'%(image_id))
    out_file = open('./labels_Converted/%s/%s.txt'%(modal, num), 'w')
    tree=ET.parse(in_file)
    root = tree.getroot()
    root  = root.find(modal)

    w, h = size_dict[modal]
    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)

        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            print('empty')
            continue
        #cls_id = classes.index(cls)
        elif (cls in ['person']):
           cls_id=0
           print(cls_id)
           out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        elif (cls in ['bike']):
           cls_id=1
           print(cls_id)
           out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        elif (cls in ['car']):
           cls_id=2
           print(cls_id)
           out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

        
       
        


modals = ['rgb']

'''for modal in modals:
    for image_set in ['train', 'test']:
        if not os.path.exists('./labels/%s'%(modal)):
            os.makedirs('./labels/%s'%(modal))
        
        image_ids = image_ids_dict[image_set]
        list_file = open('%s_%s.txt'%(modal, image_set), 'w')
        for image_id in image_ids:
            list_file.write('./Images/%s/%s%s.bmp\n'%(modal, modal, image_id))
            convert_annotation(image_id, modal)
        list_file.close()'''

num=70000
path='./ConvertedImages/rgb/'
path_list=os.listdir(path)
path_list.sort(key=lambda x: int(x.split('.')[0]))
num_file=len(path_list)
for i in range(num_file):
    image_id=str(path_list[i].split('.')[0])
    print(image_id)
    state=judge(image_id, 'rgb')
    if state==1:
       convert_annotation(image_id, 'rgb',str(num).zfill(6))
       img_rgb=cv2.imread('./ConvertedImages/rgb/'+image_id+'.png')
       img_t=cv2.imread('./ConvertedImages/finf/'+image_id+'.png')
       cv2.imwrite('./visible/'+str(num).zfill(6)+'.jpg',img_rgb)
       cv2.imwrite('./lwir/'+str(num).zfill(6)+'.jpg',img_t)
       num=num+1
    


print ("Finished!!!")

你可能感兴趣的:(xml,json,python)