在选择改进实验数据集时需要注意的一些关键点:
至于是选择公开数据集还是自制数据集,取决于您自身的条件,如果有条件(无人机等工具)那完全可以自己制作数据集。总之,在改进 YOLO 模型之前,确保你的数据集经过充分的筛选和准备,以获得最佳的结果。
划分训练集、验证集和测试集的比例没有一个固定的标准,取决于数据集的大小和可用样本数量,不过要确保数据的随机性,避免数据的偏斜或重复。通常情况下,常见的比例是将数据集划分为训练集、验证集和测试集三部分。
训练集是用来训练模型的主要数据集。模型通过训练集学习数据的模式和特征,并调整参数来最小化预测误差。训练集应具有代表性,以涵盖数据的各种变化和情况。
验证集用于调整模型的超参数和进行模型选择。超参数是在模型训练之前设置的参数,如学习率、正则化强度等。通过使用验证集,在不同超参数设置下评估模型性能,可以选择最佳的超参数组合,以提高模型的泛化能力。
测试集用于评估最终模型的性能。模型在训练和验证期间没有接触到测试集数据,因此测试集提供了一个独立的评估指标,反映了模型在真实场景中的表现。测试集应该是隐藏的,模型在训练过程中不能使用测试集进行调整。
并且比例的选择如下:
所以,比例可以是7:2:1,6:2:2,8:1:1等情况。
以YOLOv8训练为例,图像文件存放在images
文件夹中,txt标签文件存放在labels
文件夹中,使用以下代码对其进行划分:
- mydata
- images
- 1.jpg
- 2.jpg
- ...
- labels
- 1.txt
- 2.txt
- ...
import argparse
import glob
from pathlib import Path
import random
import shutil
import os
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
NUM_THREADS = min(8, max(1, os.cpu_count() - 1))
def run(func, this_iter, desc="Processing"):
with ThreadPoolExecutor(max_workers=NUM_THREADS, thread_name_prefix='MyThread') as executor:
results = list(
tqdm(executor.map(func, this_iter), total=len(this_iter), desc=desc)
)
return results
def split_dataset_into_train_val_test(
dataset_dir,
save_dir,
train_ratio=0.7,
val_ratio=0.2,
test_ratio=0.1,
im_suffix='jpg'
):
if isinstance(dataset_dir, str):
dataset_dir = Path(dataset_dir)
image_files = []
for suffix in im_suffix:
image_files += glob.glob(str(dataset_dir / 'images' / f"*.{suffix}"))
total_images = len(image_files)
random.shuffle(image_files)
train_split = int(total_images * train_ratio)
val_split = int(total_images * val_ratio)
# test_split = int(total_images * test_ratio)
if train_ratio + val_ratio == 1:
train_images = image_files[:train_split]
val_images = image_files[train_split:]
test_images = []
else:
train_images = image_files[:train_split]
val_images = image_files[train_split : train_split + val_split]
test_images = image_files[train_split + val_split :]
print('*'*25)
print(
"",
f"Total images: {total_images}\n",
f"Train images: {len(train_images)}\n",
f"Val images: {len(val_images)}\n",
f"Test images: {len(test_images)}"
)
print('*'*25)
split_paths = [("train", train_images), ("val", val_images), ("test", test_images)]
for split_name, images in split_paths:
split_dir = Path(save_dir) / split_name
for dir_name in ['images', 'labels']:
if not (split_dir / dir_name).exists():
(split_dir / dir_name).mkdir(exist_ok=True, parents=True)
args_list = [(image, dataset_dir, split_dir) for image in images]
run(process_image, args_list, desc=f"Creating {split_name} dataset")
print(f"Created {split_name} dataset with {len(images)} images.")
def process_image(args):
image_file, dataset_dir, split_dir = args
annotation_file = dataset_dir / 'labels' / f"{Path(image_file).stem}.txt"
assert annotation_file.exists(), f'{annotation_file} 不存在!'
if not has_objects(annotation_file):
return
shutil.copy(image_file, split_dir / "images" / Path(image_file).name)
shutil.copy(annotation_file, split_dir / "labels" / annotation_file.name)
def has_objects(annotation_path):
with open(annotation_path, "r") as f:
lines = f.readlines()
return len(lines) > 0
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--data', default='./data') # 数据集Images路径
parser.add_argument('--save', default='./mydata') # 保存路径
parser.add_argument('--images_suffix', default=['jpg', 'png', 'jpeg'], help='images suffix') # 图片后缀名
opt = parser.parse_args()
split_dataset_into_train_val_test(
dataset_dir=opt.data,
save_dir=opt.save,
train_ratio=0.7,
val_ratio=0.2,
im_suffix=opt.images_suffix
)
您的数据集存放格式可以如下所示:
- mydata
- Annotations
- images
运行脚本后会生成labels文件夹(用于存放txt文件)和classes.txt文件(记录种类)
- mydata
- Annotations
- images
- labels
- classes.txt
import glob
import os
import re
import xml.etree.ElementTree as ET
from pathlib import Path
import cv2
import numpy as np
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
NUM_THREADS = min(8, max(1, os.cpu_count() - 1))
def run(func, this_iter, desc="Processing"):
with ThreadPoolExecutor(max_workers=NUM_THREADS, thread_name_prefix='MyThread') as executor:
results = list(
tqdm(executor.map(func, this_iter), total=len(this_iter), desc=desc)
)
return results
# XML坐标格式转换成yolo坐标格式
def convert(size, box):
dw = 1.0 / size[0]
dh = 1.0 / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def get_xml_classes(xml_path):
f = open(xml_path) # xml文件路径
xml_text = f.read()
root = ET.fromstring(xml_text)
f.close()
for obj in root.iter("object"):
cls = obj.find("name").text
if cls not in xml_classes:
classes_file.write(cls + "\n")
xml_classes.append(cls)
# 标记文件格式转换
def convert_xml2yolo(img_path):
img_path = Path(img_path)
xml_name = re.sub(r"\.(jpg|png|jpeg)$", ".xml", img_path.name)
txt_name = re.sub(r"\.(jpg|png|jpeg)$", ".txt", img_path.name)
xml_path = Path(xml_target_path) / xml_name
txt_path = Path(save_path) / txt_name
if xml_path.exists():
out_file = open(txt_path, "w") # 转换后的txt文件存放路径
f = open(xml_path) # xml文件路径
xml_text = f.read()
root = ET.fromstring(xml_text)
f.close()
size = root.find("size")
w = int(size.find("width").text)
h = int(size.find("height").text)
if w == 0 or h == 0:
# problem_xml.append(str(img_path.name))
img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 1)
h, w, _ = img.shape
for obj in root.iter("object"):
cls = obj.find("name").text
if cls not in xml_classes:
print(cls)
continue
cls_id = xml_classes.index(cls)
xmlbox = obj.find("bndbox")
b = (
float(xmlbox.find("xmin").text),
float(xmlbox.find("xmax").text),
float(xmlbox.find("ymin").text),
float(xmlbox.find("ymax").text),
)
try:
bbox = convert((w, h), b)
except:
print(img_path)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bbox]) + "\n")
else:
print(f"{xml_path}不存在!")
if __name__ == "__main__":
xml_target_path = r"data\Annotations" # xml文件夹
save_path = r"data\labels" # 转换后的txt文件存放文件夹
images_path = r"data\images" # 图片文件夹
classes_file = open(Path(xml_target_path).parents[0] / "classes.txt", "w")
# -------------------------------------------- #
# 第一步 获得xml所有种类
# -------------------------------------------- #
assert (Path(xml_target_path)).exists(), "Annotations文件夹不存在"
xml_classes = []
xml_list = glob.glob(os.path.join(xml_target_path, "*.[x][m][l]*"))
run(get_xml_classes, xml_list)
print(Path(xml_target_path).parents[0])
print(xml_classes)
# -------------------------------------------- #
# 第二步 转换成YOLO txt
# -------------------------------------------- #
if not Path(save_path).exists():
Path(save_path).mkdir(parents=True)
file_list = glob.glob(os.path.join(images_path, "*.[jp][pn][gg]*"))
run(convert_xml2yolo, file_list)
您的数据集存放格式可以如下所示:
- mydata
- images
- labels
运行脚本后会生成Annotations文件夹(用于存放xml文件)
- mydata
- Annotations
- images
- labels
import glob
from pathlib import Path
from xml.dom.minidom import Document
import os
import cv2
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
NUM_THREADS = min(8, max(1, os.cpu_count() - 1))
def run(func, this_iter, desc="Processing"):
with ThreadPoolExecutor(max_workers=NUM_THREADS, thread_name_prefix='MyThread') as executor:
results = list(
tqdm(executor.map(func, this_iter), total=len(this_iter), desc=desc)
)
return results
def makexml(file_name):
try:
name = Path(file_name).name
xmlBuilder = Document()
annotation = xmlBuilder.createElement("annotation") # 创建annotation标签
xmlBuilder.appendChild(annotation)
txtFile = open(txtPath + name)
txtList = txtFile.readlines()
img = cv2.imread(picPath + name[0:-4] + ".jpg")
Pheight, Pwidth, Pdepth = img.shape
folder = xmlBuilder.createElement("folder") # folder标签
foldercontent = xmlBuilder.createTextNode(folder_name)
folder.appendChild(foldercontent)
annotation.appendChild(folder) # folder标签结束
filename = xmlBuilder.createElement("filename") # filename标签
filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".jpg")
filename.appendChild(filenamecontent)
annotation.appendChild(filename) # filename标签结束
size = xmlBuilder.createElement("size") # size标签
width = xmlBuilder.createElement("width") # size子标签width
widthcontent = xmlBuilder.createTextNode(str(Pwidth))
width.appendChild(widthcontent)
size.appendChild(width) # size子标签width结束
height = xmlBuilder.createElement("height") # size子标签height
heightcontent = xmlBuilder.createTextNode(str(Pheight))
height.appendChild(heightcontent)
size.appendChild(height) # size子标签height结束
depth = xmlBuilder.createElement("depth") # size子标签depth
depthcontent = xmlBuilder.createTextNode(str(Pdepth))
depth.appendChild(depthcontent)
size.appendChild(depth) # size子标签depth结束
annotation.appendChild(size) # size标签结束
for j in txtList:
oneline = j.strip().split(" ")
object = xmlBuilder.createElement("object") # object 标签
picname = xmlBuilder.createElement("name") # name标签
namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
picname.appendChild(namecontent)
object.appendChild(picname) # name标签结束
pose = xmlBuilder.createElement("pose") # pose标签
posecontent = xmlBuilder.createTextNode("Unspecified")
pose.appendChild(posecontent)
object.appendChild(pose) # pose标签结束
truncated = xmlBuilder.createElement("truncated") # truncated标签
truncatedContent = xmlBuilder.createTextNode("0")
truncated.appendChild(truncatedContent)
object.appendChild(truncated) # truncated标签结束
difficult = xmlBuilder.createElement("difficult") # difficult标签
difficultcontent = xmlBuilder.createTextNode("0")
difficult.appendChild(difficultcontent)
object.appendChild(difficult) # difficult标签结束
bndbox = xmlBuilder.createElement("bndbox") # bndbox标签
xmin = xmlBuilder.createElement("xmin") # xmin标签
mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
xminContent = xmlBuilder.createTextNode(str(mathData))
xmin.appendChild(xminContent)
bndbox.appendChild(xmin) # xmin标签结束
ymin = xmlBuilder.createElement("ymin") # ymin标签
mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
yminContent = xmlBuilder.createTextNode(str(mathData))
ymin.appendChild(yminContent)
bndbox.appendChild(ymin) # ymin标签结束
xmax = xmlBuilder.createElement("xmax") # xmax标签
mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
xmaxContent = xmlBuilder.createTextNode(str(mathData))
xmax.appendChild(xmaxContent)
bndbox.appendChild(xmax) # xmax标签结束
ymax = xmlBuilder.createElement("ymax") # ymax标签
mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
ymaxContent = xmlBuilder.createTextNode(str(mathData))
ymax.appendChild(ymaxContent)
bndbox.appendChild(ymax) # ymax标签结束
object.appendChild(bndbox) # bndbox标签结束
annotation.appendChild(object) # object标签结束
f = open(xmlPath + name[0:-4] + ".xml", 'w')
xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
f.close()
except Exception as e:
print(e)
def main(txtPath): # txt所在文件夹路径,xml文件保存路径,图片所在文件夹路径
"""此函数用于将yolo格式txt标注文件转换为voc格式xml标注文件
"""
# files = os.listdir(txtPath)
files = glob.glob(os.path.join(txtPath, '*.[t][x][t]*'))
run(makexml, files)
if __name__ == "__main__":
dic = {
'0': "Dead tree", # 创建字典用来对类型进行转换
'1': "Sick tree", # 此处的字典要与自己的classes.txt文件中的类对应,且顺序要一致
}
folder_name = "JPEGImages" # # folder标签,可更改
picPath = r"data/images/" # 图片所在文件夹路径,后面的/一定要带上
txtPath = r"data/labels/" # txt所在文件夹路径,后面的/一定要带上
xmlPath = r"data/Annotations/" # xml文件保存路径,后面的/一定要带上
assert (Path(picPath)).exists() or (Path(txtPath)).exists(), f"{picPath}或{txtPath}文件夹不存在"
if not Path(xmlPath).exists():
Path(xmlPath).mkdir(parents=True)
main(txtPath)
您的数据集存放格式可以如下所示:
- mydata
- test
- images
- labels
- classes.txt
classes.txt
存放目标类别信息,注意顺序要对应。运行脚本后会在当前根目录下生成instances_val2017.json
文件夹
'''
Date: 2023-10-18 10:41:52
LastEditors: xujiayue
LastEditTime: 2023-10-18 10:46:18
'''
import os
import cv2
import json
from tqdm import tqdm
# from sklearn.model_selection import train_test_split
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--root_dir', default=r'F:\ObjectDetection\Datasets\Experiment Datasets\archive721', type=str,
help="root path of images and labels, include ./images and ./labels and classes.txt")
parser.add_argument('--save_path', type=str, default='instances_val2017.json',
help="if not split the dataset, give a path to a json file")
arg = parser.parse_args()
def yolo2coco(arg):
root_path = arg.root_dir
print("Loading data from ", root_path)
assert os.path.exists(root_path)
originLabelsDir = os.path.join(root_path, 'test/labels')
originImagesDir = os.path.join(root_path, 'test/images')
with open(os.path.join(root_path, 'classes.txt')) as f:
classes = list(map(lambda x: x.strip(), f.readlines()))
# images dir name
indexes = os.listdir(originImagesDir)
dataset = {'categories': [], 'annotations': [], 'images': []}
for i, cls in enumerate(classes, 0):
dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'mark'})
# 标注的id
ann_id_cnt = 0
for k, index in enumerate(tqdm(indexes)):
# 支持 png jpg 格式的图片。
txtFile = index.replace('images', 'txt').replace('.jpg', '.txt').replace('.png', '.txt')
# 读取图像的宽和高
im = cv2.imread(os.path.join(originImagesDir, index))
height, width, _ = im.shape
# 添加图像的信息
if not os.path.exists(os.path.join(originLabelsDir, txtFile)):
# 如没标签,跳过,只保留图片信息。
continue
dataset['images'].append({'file_name': index,
'id': int(index[:-4]) if index[:-4].isnumeric() else index[:-4],
'width': width,
'height': height})
with open(os.path.join(originLabelsDir, txtFile), 'r') as fr:
labelList = fr.readlines()
for label in labelList:
label = label.strip().split()
x = float(label[1])
y = float(label[2])
w = float(label[3])
h = float(label[4])
# convert x,y,w,h to x1,y1,x2,y2
H, W, _ = im.shape
x1 = (x - w / 2) * W
y1 = (y - h / 2) * H
x2 = (x + w / 2) * W
y2 = (y + h / 2) * H
# 标签序号从0开始计算, coco2017数据集标号混乱,不管它了。
cls_id = int(label[0])
width = max(0, x2 - x1)
height = max(0, y2 - y1)
dataset['annotations'].append({
'area': width * height,
'bbox': [x1, y1, width, height],
'category_id': cls_id,
'id': ann_id_cnt,
'image_id': int(index[:-4]) if index[:-4].isnumeric() else index[:-4],
'iscrowd': 0,
# mask, 矩形是从左上角点按顺时针的四个顶点
'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]
})
ann_id_cnt += 1
# 保存结果
with open(arg.save_path, 'w') as f:
json.dump(dataset, f)
print('Save annotation to {}'.format(arg.save_path))
if __name__ == "__main__":
yolo2coco(arg)