python解析并修改xml文件

使用labelImg标注图片后需要统一修改图片label的名称和图片名,所以用python批量修改图片名和xml文件。
首先批量复制并修改图片名称:

# -*- coding: utf-8 -*-
# 将二级目录下的图片/标签文件重命名到同一个新文件夹下

import os, shutil

def batchRenameFile(srcDirName, destDirName):  # srcDirName 为源文件夹的绝对路径,真正保存数据文件的子文件夹都在该文件夹下;destDirName 为目标文件夹的绝对路径
    subDirNameList = os.listdir(srcDirName)  # 获取真正保存数据文件的文件夹序列
    for subDirName in subDirNameList:
        fileList = os.listdir(srcDirName+'/'+subDirName)    # 此处须给出绝对路径
        i = 1
        for file in fileList:
            shutil.copy(srcDirName+'/'+subDirName+'/'+file, destDirName+'/1_'+subDirName+'_'+str(i)+'.jpg')  # 此处须给出绝对路径
            print(destDirName+'/1_'+subDirName+'_'+str(i)+'.jpg')
            i = i+1

接着从txt文件中生成数字和标签对应的字典:

def creatDic():
    txtDict = {}
    DirFile = 'E:\Cats&Dogs\CatList.txt'
    dicFile = open(DirFile,'r')
    while True:
        line = dicFile.readline()
        if '\xef\xbb\xbf' in line:
            line = line.replace('\xef\xbb\xbf', '')
        if line == '':
            break
        key = line.split('\t')[0]
        # print(key)
        value = line.split('\t')[-1]
        # print(value)
        txtDict[key] = value  # 加入字典
    dicFile.close()

    DirFile = 'E:\Cats&Dogs\DogList.txt'
    dicFile = open(DirFile, 'r')
    while True:
        line = dicFile.readline()
        if '\xef\xbb\xbf' in line:
            line = line.replace('\xef\xbb\xbf', '')
        if line == '':
            break
        key = line.split('\t')[0]
        value = line.split('\t')[-1].split('\n')[0]
        txtDict[str(int(key)+int(42))] = value  # 加入字典
    dicFile.close()
    return txtDict

最后批量修改xml文件中对图片打的标签名称和与xml对应的图片名称:

def batchRenameFile1(DirName,txtDict):  # DirName 为文件夹的绝对路径

    FileList = os.listdir(DirName)

    for FileName in FileList:
        FilePath = DirName+'\\'+FileName
        print(FilePath)
        doc = parse(FilePath)
        root = doc.getroot()
        sub1 = root.find("filename")
        name = FileName.split(".")[0] + ".jpg"
        sub1.text = name
        sub2 = root.find("path")
        sub2.text = "E:\myVOCdevkit\VOC2007\\images\\"+name
        species = FileName.split("_")[0]
        label = FileName.split("_")[1]
        if species == "1":                 # cat
            sub3 = root.find("folder")
            sub3.text = txtDict[label]
            for sub4 in root.findall("object"):   ##找到root节点下的所有object节点,因为有不止一个名字叫object的节点
                subsub=sub4.find('name')
                subsub.text = txtDict[label]
        if species == "2":                 # dog
            sub3 = root.find("folder")
            sub3.text = txtDict[str(int(label)+int(42))]
            for sub4 in root.findall("object"):
                subsub=sub4.find('name')
                subsub.text = txtDict[str(int(label)+int(42))]

        doc.write(FilePath)

完整源文件详见github:https://github.com/vivianLL/CatDogNames

你可能感兴趣的:(python)