使用labelImg标注图片后需要统一修改图片label的名称和图片名,所以用python批量修改图片名和xml文件。
首先批量复制并修改图片名称:
# -*- coding: utf-8 -*-
# 将二级目录下的图片/标签文件重命名到同一个新文件夹下
import os, shutil
def batchRenameFile(srcDirName, destDirName): # srcDirName 为源文件夹的绝对路径,真正保存数据文件的子文件夹都在该文件夹下;destDirName 为目标文件夹的绝对路径
subDirNameList = os.listdir(srcDirName) # 获取真正保存数据文件的文件夹序列
for subDirName in subDirNameList:
fileList = os.listdir(srcDirName+'/'+subDirName) # 此处须给出绝对路径
i = 1
for file in fileList:
shutil.copy(srcDirName+'/'+subDirName+'/'+file, destDirName+'/1_'+subDirName+'_'+str(i)+'.jpg') # 此处须给出绝对路径
print(destDirName+'/1_'+subDirName+'_'+str(i)+'.jpg')
i = i+1
接着从txt文件中生成数字和标签对应的字典:
def creatDic():
txtDict = {}
DirFile = 'E:\Cats&Dogs\CatList.txt'
dicFile = open(DirFile,'r')
while True:
line = dicFile.readline()
if '\xef\xbb\xbf' in line:
line = line.replace('\xef\xbb\xbf', '')
if line == '':
break
key = line.split('\t')[0]
# print(key)
value = line.split('\t')[-1]
# print(value)
txtDict[key] = value # 加入字典
dicFile.close()
DirFile = 'E:\Cats&Dogs\DogList.txt'
dicFile = open(DirFile, 'r')
while True:
line = dicFile.readline()
if '\xef\xbb\xbf' in line:
line = line.replace('\xef\xbb\xbf', '')
if line == '':
break
key = line.split('\t')[0]
value = line.split('\t')[-1].split('\n')[0]
txtDict[str(int(key)+int(42))] = value # 加入字典
dicFile.close()
return txtDict
最后批量修改xml文件中对图片打的标签名称和与xml对应的图片名称:
def batchRenameFile1(DirName,txtDict): # DirName 为文件夹的绝对路径
FileList = os.listdir(DirName)
for FileName in FileList:
FilePath = DirName+'\\'+FileName
print(FilePath)
doc = parse(FilePath)
root = doc.getroot()
sub1 = root.find("filename")
name = FileName.split(".")[0] + ".jpg"
sub1.text = name
sub2 = root.find("path")
sub2.text = "E:\myVOCdevkit\VOC2007\\images\\"+name
species = FileName.split("_")[0]
label = FileName.split("_")[1]
if species == "1": # cat
sub3 = root.find("folder")
sub3.text = txtDict[label]
for sub4 in root.findall("object"): ##找到root节点下的所有object节点,因为有不止一个名字叫object的节点
subsub=sub4.find('name')
subsub.text = txtDict[label]
if species == "2": # dog
sub3 = root.find("folder")
sub3.text = txtDict[str(int(label)+int(42))]
for sub4 in root.findall("object"):
subsub=sub4.find('name')
subsub.text = txtDict[str(int(label)+int(42))]
doc.write(FilePath)
完整源文件详见github:https://github.com/vivianLL/CatDogNames