源码参考:ultralytics/yolov3
├── data: 存储训练时数据集相关信息缓存
├── runs: 保存训练过程中生成的所有tensorboard相关文件
├── utils: 搭建训练网络时使用到的工具
├── models: 存储yolov3网络结构配置以及网络的具体实现
path: coco128 # 数据的根目录
train: images/train2017 # 训练集数据的目录
val: images/train2017 # 验证集数据的目录
test: # 测试集数据的目录
# 类别
nc: 80 # 类别总数
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush'] # 类别名称
import yaml
import os
# 根据配置文件的train和val生成图片地址txt文件
def generate_txt(path):
f = open(path, encoding='utf8')
y = yaml.safe_load(f)
root_path = os.path.join("../", y['path'])
train_path = os.path.join("../", y['path'], y['train'])
val_path = os.path.join("../", y['path'], y['val'])
train_list = open(root_path + '/train.txt', 'w')
val_list = open(root_path + '/val.txt', 'w')
train_files = os.listdir(train_path)
val_files = os.listdir(val_path)
for file in train_files:
train_list.write(train_path + '/' + file + '\n')
for file in val_files:
val_list.write(val_path + '/' + file + '\n')
train_list.close()
val_list.close()
if __name__ == '__main__':
yaml_path = '../data/coco128.yaml' # 数据集yaml配置文件地址
generate_txt(yaml_path)
def __init__(self,
path, # 指向train.txt路径或val.txt路径
img_size=416,
batch_size=16,
augment=False, # 训练集设置为True(augment_hsv),验证集设置为False
hyp=None, # 超参数字典,其中包含图像增强会使用到的超参数
rect=False, # 是否使用rectangular training
cache_images=False, # 是否缓存图片到内存中
single_cls=False, pad=0.0, rank=-1):
try:
path = str(Path(path))
# parent = str(Path(path).parent) + os.sep
if os.path.isfile(path): # file
# 读取每一行的图片路劲信息
with open(path, "r") as f:
f = f.read().splitlines()
else:
raise Exception("%s does not exist" % path)
# 检查每张图片后缀格式是否在支持的列表中,保存支持的图像路径
# img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']
self.img_files = [x for x in f if os.path.splitext(x)[-1].lower() in img_formats]
except Exception as e:
raise FileNotFoundError("Error loading data from {}. {}".format(path, e))
# 如果图片列表中没有图片,则报错
n = len(self.img_files)
assert n > 0, "No images found in %s." % path
# batch index
# 将数据划分到一个个batch中
bi = np.floor(np.arange(n) / batch_size).astype(np.int_)
# 记录数据集划分后的总batch数
nb = bi[-1] + 1 # number of batches
self.n = n # number of images 图像总数目
self.batch = bi # batch index of image 记录哪些图片属于哪个batch
self.img_size = img_size # 这里设置的是预处理后输出的图片尺寸
self.augment = augment # 是否启用augment_hsv
self.hyp = hyp # 超参数字典,其中包含图像增强会使用到的超参数
self.rect = rect # 是否使用rectangular training
# 注意: 开启rect后,mosaic就默认关闭
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
# Define labels
# 遍历设置图像对应的label路径
# (./my_yolo_dataset/train/images/2009_004012.jpg) -> (./my_yolo_dataset/train/labels/2009_004012.txt)
self.label_files = [x.replace("images", "labels").replace(os.path.splitext(x)[-1], ".txt")
for x in self.img_files]
# Read image shapes (wh)
# 查看data文件下是否缓存有对应数据集的.shapes文件,里面存储了每张图像的width, height
sp = path.replace(".txt", ".shapes") # shapefile path
try:
with open(sp, "r") as f: # read existing shapefile
s = [x.split() for x in f.read().splitlines()]
# 判断现有的shape文件中的行数(图像个数)是否与当前数据集中图像个数相等
# 如果不相等则认为是不同的数据集,故重新生成shape文件
assert len(s) == n, "shapefile out of aync"
except Exception as e:
# print("read {} failed [{}], rebuild {}.".format(sp, e, sp))
# tqdm库会显示处理的进度
# 读取每张图片的size信息
if rank in [-1, 0]:
image_files = tqdm(self.img_files, desc="Reading image shapes")
else:
image_files = self.img_files
s = [exif_size(Image.open(f)) for f in image_files]
# 将所有图片的shape信息保存在.shape文件中
np.savetxt(sp, s, fmt="%g") # overwrite existing (if any)
# 记录每张图像的原始尺寸
self.shapes = np.array(s, dtype=np.float64)
# Rectangular Training https://github.com/ultralytics/yolov3/issues/232
# 如果为ture,训练网络时,会使用类似原图像比例的矩形(让最长边为img_size),而不是img_size x img_size
# 注意: 开启rect后,mosaic就默认关闭
if self.rect:
# Sort by aspect ratio
s = self.shapes # wh
# 计算每个图片的高/宽比
ar = s[:, 1] / s[:, 0] # aspect ratio
# argsort函数返回的是数组值从小到大的索引值
# 按照高宽比例进行排序,这样后面划分的每个batch中的图像就拥有类似的高宽比
irect = ar.argsort()
# 根据排序后的顺序重新设置图像顺序、标签顺序以及shape顺序
self.img_files = [self.img_files[i] for i in irect]
self.label_files = [self.label_files[i] for i in irect]
self.shapes = s[irect] # wh
ar = ar[irect]
# set training image shapes
# 计算每个batch采用的统一尺度
shapes = [[1, 1]] * nb # nb: number of batches
for i in range(nb):
ari = ar[bi == i] # bi: batch index
# 获取第i个batch中,最小和最大高宽比
mini, maxi = ari.min(), ari.max()
# 如果高/宽小于1(w > h),将w设为img_size
if maxi < 1:
shapes[i] = [maxi, 1]
# 如果高/宽大于1(w < h),将h设置为img_size
elif mini > 1:
shapes[i] = [1, 1 / mini]
# 计算每个batch输入网络的shape值(向上设置为32的整数倍)
self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32. + pad).astype(np.int_) * 32
# cache labels
self.imgs = [None] * n # n为图像总数
# label: [class, x, y, w, h] 其中的xywh都为相对值
self.labels = [np.zeros((0, 5), dtype=np.float32)] * n
labels_loaded = False
nm, nf, ne, nd = 0, 0, 0, 0 # number mission, found, empty, duplicate
# 这里分别命名是为了防止出现rect为False/True时混用导致计算的mAP错误
# 当rect为True时会对self.images和self.labels进行从新排序
if rect is True:
np_labels_path = str(Path(self.label_files[0]).parent) + ".rect.npy" # saved labels in *.npy file
else:
np_labels_path = str(Path(self.label_files[0]).parent) + ".norect.npy"
if os.path.isfile(np_labels_path):
x = np.load(np_labels_path, allow_pickle=True)
if len(x) == n:
# 如果载入的缓存标签个数与当前计算的图像数目相同则认为是同一数据集,直接读缓存
self.labels = x
labels_loaded = True
# 处理进度条只在第一个进程中显示
if rank in [-1, 0]:
pbar = tqdm(self.label_files)
else:
pbar = self.label_files
# 遍历载入标签文件
for i, file in enumerate(pbar):
if labels_loaded is True:
# 如果存在缓存直接从缓存读取
l = self.labels[i]
else:
# 从文件读取标签信息
try:
with open(file, "r") as f:
# 读取每一行label,并按空格划分数据
l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
except Exception as e:
print("An error occurred while loading the file {}: {}".format(file, e))
nm += 1 # file missing
continue
# 如果标注信息不为空的话
if l.shape[0]:
# 标签信息每行必须是五个值[class, x, y, w, h]
assert l.shape[1] == 5, "> 5 label columns: %s" % file
assert (l >= 0).all(), "negative labels: %s" % file
assert (l[:, 1:] <= 1).all(), "non-normalized or out of bounds coordinate labels: %s" % file
# 检查每一行,看是否有重复信息
if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
nd += 1
if single_cls:
l[:, 0] = 0 # force dataset into single-class mode
self.labels[i] = l
nf += 1 # file found
else:
ne += 1 # file empty
# 处理进度条只在第一个进程中显示
if rank in [-1, 0]:
# 更新进度条描述信息
pbar.desc = "Caching labels (%g found, %g missing, %g empty, %g duplicate, for %g images)" % (
nf, nm, ne, nd, n)
assert nf > 0, "No labels found in %s." % os.path.dirname(self.label_files[0]) + os.sep
# 如果标签信息没有被保存成numpy的格式,且训练样本数大于1000则将标签信息保存成numpy的格式
if not labels_loaded and n > 1000:
print("Saving labels to %s for faster future loading" % np_labels_path)
np.save(np_labels_path, self.labels) # save for next time
def __getitem__(self, index):
# 获取超参数
hyp = self.hyp
# 训练集开启
if self.mosaic:
# load mosaic
img, labels = load_mosaic(self, index)
shapes = None
else:
# load image
img, (h0, w0), (h, w) = load_image(self, index)
# letterbox
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
img, ratio, pad = letterbox(img, shape, auto=False, scale_up=self.augment)
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
# load labels
labels = []
x = self.labels[index]
if x.size > 0:
# Normalized xywh to pixel xyxy format
labels = x.copy() # label: class, x, y, w, h
labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
if self.augment:
# Augment imagespace
if not self.mosaic:
img, labels = random_affine(img, labels,
degrees=hyp["degrees"],
translate=hyp["translate"],
scale=hyp["scale"],
shear=hyp["shear"])
# Augment colorspace
augment_hsv(img, h_gain=hyp["hsv_h"], s_gain=hyp["hsv_s"], v_gain=hyp["hsv_v"])
nL = len(labels) # number of labels
if nL:
# convert xyxy to xywh
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
# Normalize coordinates 0-1
labels[:, [2, 4]] /= img.shape[0] # height
labels[:, [1, 3]] /= img.shape[1] # width
if self.augment:
# random left-right flip
lr_flip = True # 随机水平翻转
if lr_flip and random.random() < 0.5:
img = np.fliplr(img)
if nL:
labels[:, 1] = 1 - labels[:, 1] # 1 - x_center
# random up-down flip
ud_flip = False
if ud_flip and random.random() < 0.5:
img = np.flipud(img)
if nL:
labels[:, 2] = 1 - labels[:, 2] # 1 - y_center
labels_out = torch.zeros((nL, 6)) # nL: number of labels
if nL:
labels_out[:, 1:] = torch.from_numpy(labels)
# Convert BGR to RGB, and HWC to CHW(3x512x512)
img = img[:, :, ::-1].transpose(2, 0, 1)
img = np.ascontiguousarray(img)
return torch.from_numpy(img), labels_out, self.img_files[index], shapes, index
def __len__(self):
return len(self.img_files)