YOLO数据处理工具函数合集
changeTheLabel.py
coco_extract.py
count.py
delete.py
divide.py
examine.py
label_make.py
rename.py
xmlToTxt.py
changeTheLabel.py
import os
folder_path = r'C:\Users\User\Desktop\coco\transfer\cell_phone\label'
for filename in os. listdir( folder_path) :
if filename. endswith( '.txt' ) :
file_path = os. path. join( folder_path, filename)
with open ( file_path, 'r' ) as file :
lines = file . readlines( )
modified_lines = [ ]
for line in lines:
split_line = line. strip( ) . split( ' ' )
split_line[ 0 ] = '1'
modified_line = ' ' . join( split_line) + '\n'
modified_lines. append( modified_line)
with open ( file_path, 'w' ) as file :
file . writelines( modified_lines)
print ( "修改完成!" )
coco_extract.py
from pycocotools. coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage. io as io
import matplotlib. pyplot as plt
import cv2
from PIL import Image, ImageDraw
pathset = 'wine glass'
'''
交通 'person', 'bicycle', 'car', 'motorcycle', 'bus', 'train', 'truck'
'人', ' 自行车', '汽车', '摩托车', '公共汽车', '火车', '卡车'
257249 7056 43532 8654 6061 4570 9970
物品 'bottle', 'wine glass', 'cup', 'bowl', 'fork', 'knife', 'spoon',
'瓶子', '酒杯', '杯子', '碗', '叉子', '刀', '勺子',
24070 7839 20574 14323
手机 'cell phone'
'''
savepath = r"C:/Users/User/Desktop/coco/transfer/" + pathset+ '/'
img_dir = savepath + 'images/'
anno_dir = savepath + 'annotations/'
datasets_list = [ 'train2017' , 'val2017' ]
classes_names = [ pathset]
'''
目录格式如下:
$COCO_PATH
----|annotations
----|train2017
----|val2017
----|test2017
'''
dataDir = 'C:/Users/User/Desktop/coco/'
headstr = """\
VOC
%s
My Database
COCO
flickr
NULL
NULL
company
%d
%d
%d
0
"""
objstr = """\
%s
Unspecified
0
0
%d
%d
%d
%d
"""
tailstr = '''\
'''
def mkr ( path) :
if not os. path. exists( path) :
os. makedirs( path)
def id2name ( coco) :
classes = dict ( )
for cls in coco. dataset[ 'categories' ] :
classes[ cls[ 'id' ] ] = cls[ 'name' ]
return classes
def write_xml ( anno_path, head, objs, tail) :
f = open ( anno_path, "w" )
f. write( head)
for obj in objs:
f. write( objstr % ( obj[ 0 ] , obj[ 1 ] , obj[ 2 ] , obj[ 3 ] , obj[ 4 ] ) )
f. write( tail)
def save_annotations_and_imgs ( coco, dataset, filename, objs) :
dst_anno_dir = os. path. join( anno_dir, dataset)
mkr( dst_anno_dir)
anno_path = dst_anno_dir + '/' + filename[ : - 3 ] + 'xml'
img_path = dataDir + dataset + '/' + filename
dst_img_dir = os. path. join( img_dir, dataset)
mkr( dst_img_dir)
dst_imgpath = dst_img_dir + '/' + filename
img = cv2. imread( img_path)
shutil. copy( img_path, dst_imgpath)
head = headstr % ( filename, img. shape[ 1 ] , img. shape[ 0 ] , img. shape[ 2 ] )
tail = tailstr
write_xml( anno_path, head, objs, tail)
def showimg ( coco, dataset, img, classes, cls_id, show= True ) :
global dataDir
I = Image. open ( '%s/%s/%s' % ( dataDir, dataset, img[ 'file_name' ] ) )
annIds = coco. getAnnIds( imgIds= img[ 'id' ] , catIds= cls_id, iscrowd= None )
anns = coco. loadAnns( annIds)
objs = [ ]
for ann in anns:
class_name = classes[ ann[ 'category_id' ] ]
if class_name in classes_names:
if 'bbox' in ann:
bbox = ann[ 'bbox' ]
xmin = int ( bbox[ 0 ] )
ymin = int ( bbox[ 1 ] )
xmax = int ( bbox[ 2 ] + bbox[ 0 ] )
ymax = int ( bbox[ 3 ] + bbox[ 1 ] )
obj = [ class_name, xmin, ymin, xmax, ymax]
objs. append( obj)
draw = ImageDraw. Draw( I)
draw. rectangle( [ xmin, ymin, xmax, ymax] )
if show:
plt. figure( )
plt. axis( 'off' )
plt. imshow( I)
plt. show( )
return objs
for dataset in datasets_list:
annFile = '{}/annotations/instances_{}.json' . format ( dataDir, dataset)
coco = COCO( annFile)
classes = id2name( coco)
classes_ids = coco. getCatIds( catNms= classes_names)
for cls in classes_names:
cls_id = coco. getCatIds( catNms= [ cls] )
img_ids = coco. getImgIds( catIds= cls_id)
for imgId in tqdm( img_ids) :
img = coco. loadImgs( imgId) [ 0 ]
filename = img[ 'file_name' ]
objs = showimg( coco, dataset, img, classes, classes_ids, show= False )
save_annotations_and_imgs( coco, dataset, filename, objs)
count.py
import os
def count_files_in_folder ( folder_path) :
try :
items = os. listdir( folder_path)
file_count = 0
for item in items:
item_path = os. path. join( folder_path, item)
if os. path. isfile( item_path) :
file_count += 1
elif os. path. isdir( item_path) :
file_count += count_files_in_folder( item_path)
return file_count
except Exception as e:
print ( f"Error counting files: { e} " )
return None
folder_path = "/home/ws/CoodWorkRun/Database/smoDB_phoDB_glaDB_faceDB/JPEGImages"
result = count_files_in_folder( folder_path)
if result is not None :
print ( f"Number of files in { folder_path} : { result} " )
delete.py
import os
image_folder = r'C:\Users\User\Desktop\coco\transfer\bottle\JPEGImages'
label_folder = r'C:\Users\User\Desktop\coco\transfer\bottle\annotations'
image_files = set ( os. path. splitext( filename) [ 0 ] for filename in os. listdir( image_folder) )
label_files = set ( os. path. splitext( filename) [ 0 ] for filename in os. listdir( label_folder) )
print ( image_files)
print ( len ( image_files) )
print ( label_files)
print ( len ( label_files) )
files_to_delete = image_files. symmetric_difference( label_files)
print ( '删除文件夹:' )
print ( files_to_delete)
print ( '删除数:' )
print ( len ( files_to_delete) )
divide.py
import os
import random
import shutil
data_dir = r"D:\Database\Database地铁\smoDB_phoDB_glaDB_faceDB_v2/"
images_dir = os. path. join( data_dir, "JPEGImages" )
labels_dir = os. path. join( data_dir, "label" )
train_dir = "D:\Database\Database地铁\smoDB_phoDB_glaDB_faceDB_v2/train"
train_images_dir = os. path. join( train_dir, "images" )
train_labels_dir = os. path. join( train_dir, "labels" )
val_dir = "D:\Database\Database地铁\smoDB_phoDB_glaDB_faceDB_v2/val"
val_images_dir = os. path. join( val_dir, "images" )
val_labels_dir = os. path. join( val_dir, "labels" )
os. makedirs( train_images_dir, exist_ok= True )
os. makedirs( train_labels_dir, exist_ok= True )
os. makedirs( val_images_dir, exist_ok= True )
os. makedirs( val_labels_dir, exist_ok= True )
image_files = os. listdir( images_dir)
random. shuffle( image_files)
total_images = len ( image_files)
train_ratio = 0.9
num_train = int ( total_images * train_ratio)
train_file_list = [ ]
val_file_list = [ ]
for i, image_file in enumerate ( image_files) :
label_file = image_file. replace( ".jpg" , ".txt" )
if i < num_train:
shutil. copy( os. path. join( images_dir, image_file) , os. path. join( train_images_dir, image_file) )
shutil. copy( os. path. join( labels_dir, label_file) , os. path. join( train_labels_dir, label_file) )
train_file_list. append( os. path. join( "train" , "images" , image_file) )
else :
shutil. copy( os. path. join( images_dir, image_file) , os. path. join( val_images_dir, image_file) )
shutil. copy( os. path. join( labels_dir, label_file) , os. path. join( val_labels_dir, label_file) )
val_file_list. append( os. path. join( "val" , "images" , image_file) )
with open ( os. path. join( data_dir, "train.txt" ) , "w" ) as train_txt_file:
train_txt_file. write( "\n" . join( train_file_list) )
with open ( os. path. join( data_dir, "val.txt" ) , "w" ) as val_txt_file:
val_txt_file. write( "\n" . join( val_file_list) )
print ( f"划分完成,训练集包含 { num_train} 个样本,验证集包含 { total_images - num_train} 个样本。" )
examine.py
import os
folder_path = "/home/ws/CoodWorkRun/Database/smoDB_phoDB_glaDB/label"
empty_files = [ ]
for file_name in os. listdir( folder_path) :
if file_name. endswith( ".txt" ) :
file_path = os. path. join( folder_path, file_name)
if os. path. getsize( file_path) == 0 :
empty_files. append( file_name)
print ( "Empty file:" , file_name)
num_empty_folders = len ( empty_files)
print ( "Number of empty files:" , num_empty_folders)
label_make.py
from PIL import Image, ImageDraw
anno_box_path = r"/home/ws/CoodWorkRun/Database/人脸数据集/CelebA/Anno/list_bbox_celeba.txt"
label_dir = "/home/ws/CoodWorkRun/Database/人脸数据集/CelebA/label"
img_dir = "/home/ws/CoodWorkRun/Database/人脸数据集/CelebA/Img/img_celeba.7z/img_celeba"
count = 0
epoch = 1
box_file = open ( anno_box_path, "r" )
i = 0
for line in box_file:
if i < 2 :
i += 1
continue
i += 1
print ( line)
imgname = line[ 0 : 6 ]
img_strs = line. split( )
x1, y1, w, h = int ( img_strs[ 1 ] ) , int ( img_strs[ 2 ] ) , int ( img_strs[ 3 ] ) , int ( img_strs[ 4 ] )
x2, y2 = x1+ w, y1+ h
img = Image. open ( f" { img_dir} / { img_strs[ 0 ] } " )
img_w, img_h = img. size
dw = 1. / ( int ( img_w) )
dh = 1. / ( int ( img_h) )
x = ( ( x1 + x2) / 2.0 - 1 ) * dw
y = ( ( y1 + y2) / 2.0 - 1 ) * dh
w = ( x2 - x1) * dw
h = ( y2 - y1) * dh
label_txt = open ( f" { label_dir} / { imgname} .txt" , "w" )
label_txt. write( f"0 { x} { y} { w} { h} \n" )
label_txt. flush( )
label_txt. close( )
if i == 1 :
exit( )
rename.py
import os
def rename_files ( folder_path_images, folder_path_labels) :
try :
image_files = sorted ( os. listdir( folder_path_images) )
label_files = sorted ( os. listdir( folder_path_labels) )
if len ( image_files) != len ( label_files) :
print ( "Error: The number of files in the two folders does not match." )
return
for image_file, label_file in zip ( image_files, label_files) :
image_old_path = os. path. join( folder_path_images, image_file)
label_old_path = os. path. join( folder_path_labels, label_file)
image_name, image_ext = os. path. splitext( image_file)
new_name = f"phoneB_ { image_name} "
image_new_path = os. path. join( folder_path_images, f" { new_name} { image_ext} " )
label_new_path = os. path. join( folder_path_labels, f" { new_name} .txt" )
os. rename( image_old_path, image_new_path)
os. rename( label_old_path, label_new_path)
print ( "Files renamed successfully." )
except Exception as e:
print ( f"Error: { e} " )
folder_path_images = r'C:\Users\User\Desktop\coco\transfer\cell_phone\JPEGImages'
folder_path_labels = r'C:\Users\User\Desktop\coco\transfer\cell_phone\label'
rename_files( folder_path_images, folder_path_labels)
xmlToTxt.py
import xml. etree. ElementTree as ET
from os import listdir, getcwd
import glob
import cv2
classes = [ "cell phone" ]
def convert ( size, box) :
if size[ 0 ] == 0 :
dw = size[ 0 ]
else :
dw = 1.0 / size[ 0 ]
if size[ 1 ] == 0 :
dw = size[ 1 ]
else :
dh = 1.0 / size[ 1 ]
x = ( box[ 0 ] + box[ 1 ] ) / 2.0
y = ( box[ 2 ] + box[ 3 ] ) / 2.0
w = box[ 1 ] - box[ 0 ]
h = box[ 3 ] - box[ 2 ]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return ( '%.6f' % x, '%.6f' % y, '%.6f' % w, '%.6f' % h)
def convert_annotation ( image_name, image_path) :
print ( f"Processing { image_name} " )
f = open ( r'C:\Users\User\Desktop\coco\transfer\cell_phone\annotations/' + image_name[ 0 : - 3 ] + 'xml' , encoding= "utf8" )
out_file = open ( r'C:\Users\User\Desktop\coco\transfer\cell_phone/label/' + image_name[ 0 : - 3 ] + 'txt' , 'w' )
xml_text = f. read( )
root = ET. fromstring( xml_text)
f. close( )
size = root. find( 'size' )
img = cv2. imread( image_path)
sz = img. shape
w = int ( sz[ 1 ] )
h = int ( sz[ 0 ] )
for obj in root. iter ( 'object' ) :
cls = obj. find( 'name' ) . text
if cls not in classes:
continue
cls_id = classes. index( cls)
xmlbox = obj. find( 'bndbox' )
b = ( float ( xmlbox. find( 'xmin' ) . text) , float ( xmlbox. find( 'xmax' ) . text) , float ( xmlbox. find( 'ymin' ) . text) ,
float ( xmlbox. find( 'ymax' ) . text) )
bb = convert( ( w, h) , b)
out_file. write( str ( cls_id) + " " + " " . join( [ str ( a) for a in bb] ) + '\n' )
wd = getcwd( )
if __name__ == '__main__' :
for image_path in glob. glob( r"C:\Users\User\Desktop\coco\transfer\cell_phone/JPEGImages/*.jpg" ) :
image_name = image_path. split( '\\' ) [ - 1 ]
convert_annotation( image_name, image_path)
print ( '完成!' )