python将labelImg标注的xml文件批量转为csv文件

labelImg标注好的xml文件及文件内容格式如下:
python将labelImg标注的xml文件批量转为csv文件_第1张图片
python将labelImg标注的xml文件批量转为csv文件_第2张图片
批量转换
创建xml2csv.py文件,文件内容如下:

# -*- coding:utf-8 -*-

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
import random

def xml_to_csv(path):
    xml_list = []
    xml_list_test = []
    # 设置训练集所占比例
    rate = 0.8
    i = 0
    img_file = glob.glob(path + '/*.xml')
    random.shuffle(img_file)
    for xml_file in img_file:
        i = i + 1
        num_of_train = int(len(glob.glob(path + '/*.xml')) * rate)
        tree = ET.parse(xml_file)
        root = tree.getroot()
        if i <= num_of_train:
            for member in root.findall('object'):
                value = (root.find('filename').text,
                         int(root.find('size')[0].text),
                         int(root.find('size')[1].text),
                         member[0].text,
                         int(member[4][0].text),
                         int(member[4][1].text),
                         int(member[4][2].text),
                         int(member[4][3].text)
                         )
                xml_list.append(value)
            # print(xml_list)
        else:
            for member in root.findall('object'):
                value = (root.find('filename').text,
                         int(root.find('size')[0].text),
                         int(root.find('size')[1].text),
                         member[0].text,
                         int(member[4][0].text),
                         int(member[4][1].text),
                         int(member[4][2].text),
                         int(member[4][3].text)
                         )
                xml_list_test.append(value)
            # print(xml_list_test)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    xml_df_test = pd.DataFrame(xml_list_test, columns=column_name)
    return xml_df, xml_df_test

def main():
    # xml文件的存储地址,根据自己xml存储路径进行调整
    image_path = os.path.join('E:/回字四点数据/test', 'jpg')
    # csv文件保存位置,自行调整
    # 训练集
    csv_save_path = 'E:/test/data/train_labels.csv'
    # 测试集
    csv_save_path_test = 'E:/test/data/test_labels.csv'

    xml_df, xml_df_test = xml_to_csv(image_path)
    xml_df.to_csv(csv_save_path, index=None)
    xml_df_test.to_csv(csv_save_path_test, index=None)
    print('Successfully converted xml to csv.')
main()

转换成的csv文件内容如下:
python将labelImg标注的xml文件批量转为csv文件_第3张图片
注意: 由于我用的labelImg版本标注好的xml文件filename属性图片名无后缀.jpg,可以看到转换成的csv文件中第一列filename都是无后缀.jpg的。

python修改csv文件某一列统一加后缀
创建modify_csv.py文件,内容如下:

# -*- coding:utf-8 -*-

import numpy as np
import pandas as pd

train_filename = pd.read_csv("train_labels.csv")    # csv文件
# 对csv文件中的filename一列的数据加上后缀.jpg
train_filename['filename'] = train_filename['filename'].apply(lambda x:str(int(x))+'.jpg')
train_filename.to_csv("train_labels_new.csv", index=None)

test_filename = pd.read_csv("test_labels.csv")
test_filename['filename'] = test_filename['filename'].apply(lambda x:str(int(x))+'.jpg')
test_filename.to_csv("test_labels_new.csv", index=None)

对filename一列统一加后缀后的csv文件如下:
python将labelImg标注的xml文件批量转为csv文件_第4张图片

你可能感兴趣的:(目标检测,python,xml,csv)