import os
from PIL import Image
defgetFilePathList(dirPath, partOfFileName=''):
allFileName_list =list(os.walk(dirPath))[0][2]
fileName_list =[k for k in allFileName_list if partOfFileName in k]
filePath_list =[os.path.join(dirPath, k)for k in fileName_list]return filePath_list
defbatchResizeImage(oldDirPath, newDirPath, height, width):ifnot os.path.isdir(newDirPath):
os.mkdir(newDirPath)
jpgFilePath_list = getFilePathList(oldDirPath,'.jpg')for jpgFilePath in jpgFilePath_list:
image = Image.open(jpgFilePath)
resized_image = image.resize((height, weight), Image.ANTIALIAS)
jpgFileName = os.path.split(jpgFilePath)[1]
saveFilePath = os.path.join(newDirPath, jpgFileName)
resized_image.save(saveFilePath)
oldDirPath ='source_images'
newDirPath ='train_images'
height =640
width =640
batchResizeImage(oldDirPath, newDirPath, height, width)
1.3 查:查询文件夹中的文件
import os
defgetFileNameList(dirPath, partOfFileName=''):
allFileName_list =list(os.walk(dirPath))[0][2]
fileName_list =[k for k in allFileName_list if partOfFileName in k]return fileName_list
defgetFilePathList(dirPath, partOfFileName=''):
allFileName_list =list(os.walk(dirPath))[0][2]
fileName_list =[k for k in allFileName_list if partOfFileName in k]
filePath_list =[os.path.join(dirPath, k)for k in fileName_list]return filePath_list
import os
# 传入3个参数:文件夹路径dirPath、指定内容partOfFileContent、代码文件后缀名suffixOfFileNamedefsearchFileContent(dirPath, partOfFileContent, suffixOfFileName=''):
dirPath = os.path.expanduser(dirPath)
walk_list =list(os.walk(dirPath))
result_list =[]for walk in walk_list:
filePath_list =[os.path.join(walk[0], k)for k in walk[2] \
if k.rsplit('.', maxsplit=1)[1]==suffixOfFileName.strip('.')]for filePath in filePath_list:withopen(filePath, encoding='=utf8')asfile:
fileContent =file.read()if partOfFileContent in fileContent:W
print(filePath)
result_list.append(filePath)return result_list
import re
import os
rawfolder='123'#存放三张xml的文件夹
newfolder='33333'#生成的新的xml文件夹for i in os.listdir(rawfolder):print(i)#输出#20190720073948_000258_cc8cdaa5ee49.xml#20190720073950_000257_cc8cdaa64390.xml#20190720073950_000258_cc8cdaa5ee3e.xmlwithopen(rawfolder+'/'+i,'r')as r:
content = r.readlines()#print(content)#输出['\n', '\timg\n', '\t20190720073948_000258_cc8cdaa5ee49.JPG\n', ...]
c =0for j in content:if'w'in j:print(j,'下标-》',c)#c为14行w,从0行开始
start =0
end = c-1# c-1为上一行
first_part = content[start:end]
second_part = content[end+12:]#整个一块为w的object
final = first_part+second_part
for x in final:withopen(newfolder+'/'+i,'a+')as w:
w.writelines(x)print(x)
c+=1# break
2.3 检查不是people和obstacle的label
# 检查不是people和obstacle的labelimport re
import os
rawfolder='123'#newfolder='33333'for i in os.listdir(rawfolder):# print (i)withopen(rawfolder+'/'+i,'r')as r:
content = r.readlines()# print(content)for j in content:if''in j and('people'notin j and'obstacle'notin j):print(j)print(i)
2.4 读取指定后缀
import os
defget_filePathList(dirPath, partOfFileName=''):
all_fileName_list =next(os.walk(dirPath))[2]#['20190720072950_000256_cc8cdaa64390.JPG',#'20190720073948_000258_cc8cdaa5ee49.JPG',# '20190720073950_000257_cc8cdaa64390.JPG',# '20190720074950_000259_cc8cdaa5ee3e .jpg',#'20190720074950_000259_cc8cdaa5ee3e.JPG']
fileName_list =[k for k in all_fileName_list if partOfFileName in k]#去除除了'.JPG'文件,不含前面绝对路径
filePath_list =[os.path.join(dirPath, k)for k in fileName_list]#含全部路径,['', # '']#return fileName_listreturn filePath_list
dirPath='C:/Users/lenovo/Desktop/lian'
a=get_filePathList(dirPath,'.JPG')
a
#print(len(a))
2.5 检查是否有图片漏标,并删除漏标图片
defdelete_file(filePath):ifnot os.path.exists(filePath):#filePath指C:/Users/lenovo/Desktop/lianxi/img\\20190720072950_000256_cc8cdaa64390.JPG'print('%s 这个文件路径不存在,请检查一下'%filePath)else:print('%s 这个路径的文件需手动删除'%filePath)defcheck_1(dirPath, suffix):
xmlFilePath_list = get_filePathList(dirPath,'.xml')# 与suffix不同,自己指定'.xml'
xmlFilePathPrefix_list =[k[:-4]for k in xmlFilePath_list]# 不带.xml
xmlFilePathPrefix_set =set(xmlFilePathPrefix_list)#print(xmlFilePathPrefix_set) #{'绝对路径不带后缀',# ' ' }
imageFilePath_list = get_filePathList(dirPath, suffix)
imageFilePathPrefix_list =[k[:-4]for k in imageFilePath_list]# 不带后缀
imageFilePathPrefix_set =set(imageFilePathPrefix_list)#print(imageFilePathPrefix_set)
redundant_imgFilePathPrefix_list =list(imageFilePathPrefix_set - xmlFilePathPrefix_set)
redundant_imgFilePath_list =[k+'.JPG'for k in redundant_imgFilePathPrefix_list]#上行带.JPG后缀, 如果自定义.0JPG,显示这个文件路径不存在,请检查一下for imgFilePath in redundant_imgFilePath_list:
delete_file(imgFilePath)
dirPath='C:/Users/lenovo/Desktop/lx'
check_1(dirPath,'.JPG')
2.6 检测标记的box是否超过图片的边界,若有则显示删除与box相关的xml文件和图片文件
import xml.etree.ElementTree as ET
from PIL import Image
defcheck_2(dirPath, suffix):
xmlFilePath_list = get_filePathList(dirPath,'.xml')#print(xmlFilePath_list) #['.xml全部路径',# ' ']
allFileCorrect =True# 跳出for循环则执行 if allFileCorrectfor xmlFilePath in xmlFilePath_list:
imageFilePath = xmlFilePath[:-4]+'.'+ suffix.strip('.')#print(xmlFilePath) #print(imageFilePath)#C:/Users/lenovo/Desktop/lx\20190720072950_000256_cc8cdaa64390.xml#C:/Users/lenovo/Desktop/lx\20190720072950_000256_cc8cdaa64390.JPG#.....
image = Image.open(imageFilePath)
width, height = image.size
withopen(xmlFilePath)asfile:
fileContent =file.read()#print(fileContent) #...
root = ET.XML(fileContent)#根...
object_list = root.findall('object')# for object_item in object_list:
bndbox = object_item.find('bndbox')#
xmin =int(bndbox.find('xmin').text)
ymin =int(bndbox.find('ymin').text)
xmax =int(bndbox.find('xmax').text)
ymax =int(bndbox.find('ymax').text)if xmax>xmin and ymax>ymin and xmax<=width and ymax<=height:continueelse:
delete_file(xmlFilePath)
delete_file(imageFilePath)
allFileCorrect =Falsebreakif allFileCorrect:print('祝贺你! 已经通过检验,所有xml文件中的标注框都没有越界')
dirPath='C:/Users/lenovo/Desktop/lx'#lx文件夹里.xml和.JPG混在一起
check_2(dirPath,'.JPG')#''里必须.JPG或不填
2.7 检查xmin<0…,并修改xmin…
#coding=utf-8import os
import shutil
import random
from xml.etree.ElementTree import ElementTree,Element
import cv2
defread_xml(in_path):'''
读取并解析xml文件
in_path: xml路径
return: ElementTree
'''
tree = ElementTree()
tree.parse(in_path)return tree
defcheck():
url ="C:/Users/lenovo/Desktop/source/xml_sum"# xml_sum只存放xml的文件夹for item in os.listdir(url):# item为.xml文件
tree = read_xml(url +"/"+ item)# read_xml函数上面定义
root = tree.getroot()object= root.findall("object")
size = root.find("size")
width =int(size.find("width").text)
height =int(size.find("height").text)ifobject==None:print(item)continuefor it inobject:
bndbox = it.find("bndbox")if bndbox ==None:print(item)
xmin =int(bndbox.find("xmin").text)
xmax =int(bndbox.find("xmax").text)
ymin =int(bndbox.find("ymin").text)
ymax =int(bndbox.find("ymax").text)if xmin <=0or xmin >= xmax or ymin <=0or ymin >= ymax:print(item)if xmax > width or ymax> height:print(item)if __name__ =='__main__':
check()# 不输出则表示全对。输出123111.xml,没有列表引号
defget_classNameList(txtFilePath):withopen(txtFilePath,'r', encoding='utf8')asfile:
fileContent =file.read()
line_list =[k.strip()for k in fileContent.split('\n')if k.strip()!='']
className_list=sorted(line_list, reverse=False)return className_list
txtFilePath='C:/Users/lenovo/Desktop/labelImg/data/predefined_classes -outofstock.txt'
get_classNameList(txtFilePath)
import os
pathnoname,name=os.path.split("E:/lpthw/zedshaw/ex19.py")print(pathnoname)print(name)
# 添加环境变量import sys
sys.path.append('')
2.9 检查trainval.txt
import cv2
from os import listdir
from os.path import isfile,isdir,join
trainval_list =list()withopen('./trainval.txt','r')as f:for line in f.readlines():
line = line.strip('\n')
a = line +'.jpg'
trainval_list.append(a)print(trainval_list)
for i in trainval_list:
img_path ='{}{}'.format('./img3/',i)
img = cv2.imread(img_path)try:
img.shape
print(img.shape)# 在img3文件夹中没有......11111.jpg图片except:print('fail read:'+ img_path)continue