yolov7训练自定义数据集示例

参考文章以下文章

https://blog.csdn.net/athrunsunny/article/details/122132518

https://zhuanlan.zhihu.com/p/547878330

图片标注工具labelme安装

conda install pyqt=5

conda install labelme

# 备注labelme用法可自行查找相关文章

图片数据处理脚本

bmp转换jpg格式

bmp2jpg.py

# encoding=utf-8
import os
import cv2

# 图片的路径
bmp_dir = "D:\\sourcepng"
jpg_dir = "D:\\target"

filelists = os.listdir(bmp_dir)

for i,file in enumerate(filelists):
    # 读图,-1为不改变图片格式,0为灰度图
    img = cv2.imread(os.path.join(bmp_dir,file),-1)
    # newName = file.replace('.bmp','.jpg') #bmp转jpg
    newName = file.replace('.png','.jpg') # 这里是测试png转换jpg
    cv2.imwrite(os.path.join(jpg_dir,newName),img)
    print('第%d张图:%s'%(i+1,newName))

labelme标注数据转换训练集数据脚本

label2yolo.py

import os
import numpy as np
import json
from glob import glob
import cv2
from sklearn.model_selection import train_test_split
from shutil import copyfile
import argparse



# Labelme坐标到YOLO V5坐标的转换
def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)

# 样本转换
def convertToYolo5(fileList, output_dir, labelme_path ,datatype):
    # 创建指定样本的父目录
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # 创建指定样本的images和labels子目录
    yolo_images_dir = '{0}/images/{1}/'.format(output_dir,datatype)
    yolo_labels_dir = '{0}/labels/{1}/'.format(output_dir,datatype)
    
    if not os.path.exists(yolo_images_dir):
        os.makedirs(yolo_images_dir)
    if not os.path.exists(yolo_labels_dir):
        os.makedirs(yolo_labels_dir)
    
    # 一个样本图片一个样本图片地转换
    for json_file_ in fileList:
        # 1. 生成YOLO样本图片
        # 构建json图片文件的全路径名
        imagePath = labelme_path +'/'+ json_file_ + ".jpg"
        # 构建Yolo图片文件的全路径名
        yolo_image_file_path = yolo_images_dir + json_file_ + ".jpg"
        # copy样本图片
        copyfile (imagePath, yolo_image_file_path)
        
        # 2. 生成YOLO样本标签
        # 构建json标签文件的全路径名
        json_filename = labelme_path +'/'+ json_file_ + ".json"
        # 构建Yolo标签文件的全路径名
        yolo_label_file_path = yolo_labels_dir + json_file_ + ".txt"
        # 创建新的Yolo标签文件
        yolo_label_file = open(yolo_label_file_path, 'w')
        
        # 获取当前图片的Json标签文件
        json_obj = json.load(open(json_filename, "r", encoding="utf-8"))

        # 获取当前图片的长度、宽度信息
        height = json_obj['imageHeight']
        width  = json_obj['imageWidth']
        
        # 依次读取json文件中所有目标的shapes信息
        for shape in json_obj["shapes"]:
            # 获取shape中的物体分类信息
            label = shape["label"]
            if (label not in obj_classes):
                obj_classes.append(label)
            
            # 获取shape中的物体坐标信息
            if (shape["shape_type"] == 'rectangle'):
                points = np.array(shape["points"])
                xmin = min(points[:, 0]) if min(points[:, 0]) > 0 else 0
                xmax = max(points[:, 0]) if max(points[:, 0]) > 0 else 0
                ymin = min(points[:, 1]) if min(points[:, 1]) > 0 else 0
                ymax = max(points[:, 1]) if max(points[:, 1]) > 0 else 0
            
                # 对坐标信息进行合法性检查
                if xmax <= xmin:
                    pass
                elif ymax <= ymin:
                    pass
                else:
                    # Labelme坐标转换成YOLO V5坐标
                    bbox_labelme_float   = (float(xmin), float(xmax), float(ymin), float(ymax))
                    bbox_yolo_normalized = convert((width, height), bbox_labelme_float)
                    
                    # 把分类标签转换成分类id
                    class_id = obj_classes.index(label)
                    
                    # 生成YOLO V5的标签文件
                    yolo_label_file.write(str(class_id) + " " + " ".join([str(a) for a in bbox_yolo_normalized]) + '\n')
        yolo_label_file.close()
    
def check_output_directory(output = ""):
    # 创建保存输出图片的目录
    save_path = output + '/'
    is_exists = os.path.exists(save_path)
    
    if is_exists:
        print('Warning: path of %s already exist, please remove it firstly by manual' % save_path)
        #shutil.rmtree(save_path)  # 避免误删除已有的文件
        return ""
    
    #print('create output path %s' % save_path)
    os.makedirs(save_path)
    
    return save_path


def create_yolo_dataset_cfg(output_dir='', label_class = []):
    # 创建文件
    data_cfg_file = open(output_dir + '/data.yaml', 'w')
    
    # 创建文件内容
    data_cfg_file.write('train:  ../train/images\n')
    data_cfg_file.write("val:    ../valid/images\n")
    data_cfg_file.write("test:   ../test/images\n")
    data_cfg_file.write("\n")
    data_cfg_file.write("# Classes\n")
    data_cfg_file.write("nc: %s\n" %len(label_class))
    data_cfg_file.write('names: ')
    i = 0
    for label in label_class:
        if (i == 0):
            data_cfg_file.write("[")
        else:
            data_cfg_file.write(", ")
            if  (i % 10 == 0):
                data_cfg_file.write("\n        ")
        i += 1
        data_cfg_file.write("'" + label + "'")
    data_cfg_file.write(']  # class names')
    data_cfg_file.close()
    #关闭文件

def labelme2yolo(input = '', output = ''):

    outputdir_root = check_output_directory(output)
    if outputdir_root == "":
        print("No valid output directory, Do Nothing!")
        return -1
    
    labelme_path = input
    
    # 1.获取input目录中所有的json标签文件全路径名
    files = glob(labelme_path + "/*.json")
    
    # 2.获取所有标签文件的短文件名称
    files = [i.replace("\\", "/").split("/")[-1].split(".json")[0] for i in files]
    
    # 3. 按比例随机切分数据集,获取训练集样本
    train_files, valid_test_files = train_test_split(files, test_size=0.3, random_state=55)
    
    # 4. 按比例随机切分数据集,获取验证集和测试集样本
    valid_files, test_files     = train_test_split(valid_test_files, test_size=0.3, random_state=55)

    # 5. 构建YOLO数据集目录
    # train_path = outputdir_root+'/train'
    # valid_path = outputdir_root+'/valid'
    # test_path  = outputdir_root+'/test'
    
    # 6. 生成YOLO 训练、验证、测试数据集:图片+标签
    convertToYolo5(train_files, outputdir_root, labelme_path , "train")
    convertToYolo5(valid_files, outputdir_root, labelme_path , "valid")
    convertToYolo5(test_files,  outputdir_root,  labelme_path , "test")
    
    # 7. 创建YOLO数据集配置文件
    create_yolo_dataset_cfg(output, obj_classes)
    
    print("Classes:", obj_classes)
    print('Finished, output path =', outputdir_root)
    
    return 0
    
# def parse_opt():
#     # define argparse object
#     parser = argparse.ArgumentParser()
    
#     # add argument for command line
#     parser.add_argument('--input',      type=str, help='The input Labelme directory')
#     parser.add_argument('--output',     type=str, help='The output YOLO V5 directory')
    
#     # parse arges from command line
#     opt = parser.parse_args()
#     print("input  =", opt.input)
#     print("output =", opt.output)
    
#     # return opt
#     return opt

def main(sourcedir,targetdir):
    labelme2yolo(sourcedir,targetdir)

if __name__ == '__main__':
	# 对应类别
    obj_classes = ['maotu', 'miluxiaoyao','linghou']
    # opt = parse_opt()
    source_dir="D:\\target"  # label标注数据,jpg图片和对应json
    target_dir="D:\\yolodata"
    main(source_dir,target_dir)

训练图片路径集整合脚本

txtdemo.py

import glob

#存放图片的地址
train_image_path = "D:\\pythondemo\\yolov7\\datasets\\Helmet\\images\\train\\"
valid_image_path = "D:\\pythondemo\\yolov7\\datasets\\Helmet\\images\\valid\\"
#生成的txt的路径
txt_path = "D:\\pythondemo\\yolov7\\datasets\\Helmet\\"

def generate_train_and_val(image_path, txt_file):
    with open(txt_file, 'w') as tf:
        for jpg_file in glob.glob(image_path + '*.jpg'):
            tf.write(jpg_file + '\n')
    # print(txt_file)

generate_train_and_val(train_image_path, txt_path + 'train.txt')
generate_train_and_val(valid_image_path, txt_path + 'valid.txt')


相关数据处理

由于训练用的是yolov7_traning.pt
基于cfg/traning/yolov7.yaml拷贝,重命名yolov7-Helmet.yaml,这里改文件只需修改开头处的类数即可

#数据文件相关定义
这里基于data/coco.yaml拷贝文件,重命名为Helmet.yaml 

#相关修改数据如下
train: ./datasets/Helmet/train.txt  # 这里train.txt中为训练图片的绝对路径
val: ./datasets/Helmet/valid.txt    # 同上
# test: ./datasets/Helmet/test/images/ 

# number of classes
nc: 3

# class names
names:  ['maotu', 'miluxiaoyao','linghou']



# 3 数据集存放
datasets\Helmet\images\train # 存放所有的训练图片
datasets\Helmet\images\valid # 存放所有的校验图片
datasets\Helmet\images\test # 存放所有的校验图片

datasets\Helmet\labels\train # 存放所有的训练图片的标注标签数据
datasets\Helmet\labels\valid # 存放所有的验证图片的标注标签数据


训练测试


## 备注,这里建议是将每类图片分开存放处理,分别分好每类图片的训练集/验证集/测试集之后再合并之后进行训练
# 这里是近71张图片,3种类型,训练集49张,验证集15张,测试集7张,训练200次结果,使用cpu,耗时5小时
     Epoch   gpu_mem       box       obj       cls     total    labels  img_size
   199/199        0G    0.0245  0.006483  0.001596   0.03258         2       640: 100%|██| 7/7 [01:20<00:00, 11.46s/it]
               Class      Images      Labels           P           R      [email protected]  [email protected]:.95: 100%|| 1/1 [00:07<00:00,
                 all          15          44       0.777        0.71       0.672       0.299
               maotu          15          22       0.898       0.727       0.803       0.358
         miluxiaoyao          15          11       0.894       0.767       0.838       0.399
             linghou          15          11       0.538       0.636       0.376        0.14
             
             
# 进行图片识别验证
python detect.py --weights runs/train/exp/weights/best.pt --source datasets/Helmet/images/test

遇到问题

这里个人训练用的是cpu

问题1 运行报错

# 报错
OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized.OMP: Hint This means that multiple copies of the OpenMP runtime have been linked into the program. That is dangerous…

# 参考文章 https://www.iotword.com/4385.html
# 处理方案如下
找到你anaconda的安装地址然后找到envs,找到你所用的环境,然后搜索libiomp5md.dll,关键问题在于anaconda的环境下存在两个libiomp5md.dll文件

仔细观察,一个在Lib\site-packages\torch\lib下,另一个在Library\bin下,第一个是torch下的,留下这个,第二个是conda的,新建文件夹将它剪切到这个文件夹,暂存一下。(暂存路径:D:\conda_libiomp5md\libiomp5md.dll)
最后确认一下,只有一个libiomp5md.dll即可。


问题2 cpu训练报错cuda的异常

# 报错
  File "D:\myselfcode\pythondemo\yolov7\utils\loss.py", line 786, in build_targets
    matching_bs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
# 根据报错代码定位,异常代码在utils/loss.py中

定位到报错的代码行,修改 device='cuda:0'device='cuda:0' if torch.cuda.is_available() else 'cpu'

你可能感兴趣的:(YOLO,python,pytorch)