目前,深度学习已经在计算机视觉领域取得了显著的成果,特别是目标检测领域。YOLO(You Only Look Once)是一种基于深度学习的实时目标检测算法,其通过将目标检测问题转化为回归问题,将目标的位置和类别同时预测出来。YOLO算法具有快速、准确的特点,因此在目标检测领域受到了广泛关注。
(2)打开labelImg并选择“Open Dir”来选择你的图片目录。
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
classes = [] # 初始化为空列表
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(image_id):
in_file = open('./label_xml\%s.xml' % (image_id), encoding='UTF-8')
out_file = open('./label_txt\%s.txt' % (image_id), 'w') # 生成txt格式文件
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls not in classes:
classes.append(cls) # 如果类别不存在,添加到classes列表中
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
xml_path = os.path.join(CURRENT_DIR, './label_xml/')
# xml list
img_xmls = os.listdir(xml_path)
for img_xml in img_xmls:
label_name = img_xml.split('.')[0]
print("Classes:") # 打印最终的classes列表
print(classes) # 打印最终的classes列表
class YOLOv5Detector:
def __init__(self, weights='yolov5s.pt', source='data/images', imgsz=640, conf_thres=0.25, iou_thres=0.45,
max_det=1000, device='', view_img=False, save_txt=False, save_conf=False, save_crop=False,
nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False,
project='runs/detect', name='exp', exist_ok=False, line_thickness=3, hide_labels=False,
hide_conf=False, half=False):
self.weights = weights
self.source = source
self.imgsz = imgsz
self.conf_thres = conf_thres
self.iou_thres = iou_thres
self.max_det = max_det
self.device = device
self.view_img = view_img
self.save_txt = save_txt
self.save_conf = save_conf
self.save_crop = save_crop
self.nosave = nosave
self.classes = classes
self.agnostic_nms = agnostic_nms
self.augment = augment
self.visualize = visualize
self.update = update
self.project = project
self.name = name
self.exist_ok = exist_ok
self.line_thickness = line_thickness
self.hide_labels = hide_labels
self.hide_conf = hide_conf
self.half = half
def run(self):
save_img = not self.nosave and not self.source.endswith('.txt') # save inference images
webcam = self.source.isnumeric() or self.source.endswith('.txt') or self.source.lower().startswith(
('rtsp://', 'rtmp://', 'http://', 'https://'))
# Directories
save_dir = increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok) # increment run
(save_dir / 'labels' if self.save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
device = select_device(self.device)
half = self.half & device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(self.weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(self.imgsz, s=stride) # check image size
names = model.module.names if hasattr(model, 'module') else model.names # get class names
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet50', n=2) # initialize
modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
# Dataloader
if webcam:
class ModelExporter:
def __init__(self, weights='./yolov5s.pt', img_size=(640, 640), batch_size=1, device='cpu',
include=('torchscript', 'onnx', 'coreml'), half=False, inplace=False, train=False,
optimize=False, dynamic=False, simplify=False, opset_version=12):
self.weights = weights
self.img_size = img_size
self.batch_size = batch_size
self.device = device
self.include = include
self.half = half
self.inplace = inplace
self.train = train
self.optimize = optimize
self.dynamic = dynamic
self.simplify = simplify
self.opset_version = opset_version
def export_torchscript(self, model, img, file, optimize):
# TorchScript model export
prefix = colorstr('TorchScript:')
print(f'\n{prefix} starting export with torch {torch.__version__}...')
f = file.with_suffix('.torchscript.pt')
ts = torch.jit.trace(model, img, strict=False)
(optimize_for_mobile(ts) if optimize else ts).save(f)
print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return ts
except Exception as e:
print(f'{prefix} export failure: {e}')
def export_onnx(self, model, img, file, opset_version, train, dynamic, simplify):
# ONNX model export
prefix = colorstr('ONNX:')
check_requirements(('onnx', 'onnx-simplifier'))
import onnx
print(f'\n{prefix} starting export with onnx {onnx.__version__}...')
f = file.with_suffix('.onnx')
torch.onnx.export(model, img, f, verbose=False, opset_version=opset_version,
training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL,
do_constant_folding=not train,
dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # shape(1,3,640,640)
'output': {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
} if dynamic else None)
# Checks
model_onnx = onnx.load(f) # load onnx model
onnx.checker.check_model(model_onnx) # check onnx model
# print(onnx.helper.printable_graph(model_onnx.graph)) # print
# Simplify
if simplify:
import onnxsim
print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
model_onnx, check = onnxsim.simplify(
input_shapes={'images': list(img.shape)} if dynamic else None)
assert check, 'assert check failed'
onnx.save(model_onnx, f)
except Exception as e:
print(f'{prefix} simplifier failure: {e}')
print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
except Exception as e:
print(f'{prefix} export failure: {e}')
def export_coreml(self, model, img, file):
# CoreML model export
prefix = colorstr('CoreML:')
import coremltools as ct
print(f'\n{prefix} starting export with coremltools {ct.__version__}...')
f = file.with_suffix('.mlmodel')
model.train() # CoreML exports should be placed in model.train() mode
ts = torch.jit.trace(model, img, strict=False) # TorchScript model
model = ct.convert(ts, inputs=[ct.ImageType('image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
except Exception as e:
print(f'{prefix} export failure: {e}')
def run(self):
t = time.time()
include = [x.lower() for x in self.include]
img_size = self.img_size * 2 if len(self.img_size) == 1 else 1 # expand
file = Path(self.weights)
# Load PyTorch model
device = select_device(self.device)
assert not (device.type == 'cpu' and self.half), '--half only compatible with GPU export, i.e. use --device 0'
model = attempt_load(self.weights, map_location=device) # load FP32 model
names = model.names
# Input
gs = int(max(model.stride)) # grid size (max stride)
img_size = [check_img_size(x, gs) for x in img_size] # verify img_size are gs-multiples
img = torch.zeros(self.batch_size, 3, *img_size).to(device) # image size(1,3,320,192) iDetection
# Update model
if self.half:
img, model = img.half(), model.half() # to FP16
model.train() if self.train else model.eval() # training mode = no Detect() layer grid construction
for k, m in model.named_modules():
if isinstance(m, Conv): # assign export-friendly activations
if isinstance(m.act, nn.Hardswish):
m.act = Hardswish()
elif isinstance(m.act, nn.SiLU):
m.act = SiLU()
elif isinstance(m, Detect):
m.inplace = self.inplace
m.onnx_dynamic = self.dynamic
# m.forward = m.forward_export # assign forward (optional)
for _ in range(2):
y = model(img) # dry runs
print(f"\n{colorstr('PyTorch:')} starting from {self.weights} ({file_size(self.weights):.1f} MB)")
# Exports
if 'torchscript' in include:
self.export_torchscript(model, img, file, self.optimize)
if 'onnx' in include:
self.export_onnx(model, img, file, self.opset_version, self.train, self.dynamic, self.simplify)
if 'coreml' in include:
self.export_coreml(model, img, file)
# Finish
print(f'\nExport complete ({time.time() - t:.2f}s). Visualize with https://github.com/lutzroeder/netron.')
class YOLOv5:
def __init__(self, name='yolov5s', pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
self.name = name
self.pretrained = pretrained
self.channels = channels
self.classes = classes
self.autoshape = autoshape
self.verbose = verbose
self.device = device
self.model = self._create()
def _create(self):
from pathlib import Path
from models.yolo import Model, attempt_load
from utils.general import check_requirements, set_logging
from utils.google_utils import attempt_download
from utils.torch_utils import select_device
file = Path(__file__).absolute()
check_requirements(requirements=file.parent / 'requirements.txt', exclude=('tensorboard', 'thop', 'opencv-python'))
save_dir = Path('') if str(self.name).endswith('.pt') else file.parent
path = (save_dir / self.name).with_suffix('.pt') # checkpoint path
device = select_device(('0' if torch.cuda.is_available() else 'cpu') if self.device is None else self.device)
if self.pretrained and self.channels == 3 and self.classes == 80:
model = attempt_load(path, map_location=device) # download/load FP32 model
cfg = list((Path(__file__).parent / 'models').rglob(f'{self.name}.yaml'))[0] # model.yaml path
model = Model(cfg, self.channels, self.classes) # create model
if self.pretrained:
ckpt = torch.load(attempt_download(path), map_location=device) # load
msd = model.state_dict() # model state_dict
csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape} # filter
model.load_state_dict(csd, strict=False) # load
if len(ckpt['model'].names) == self.classes:
model.names = ckpt['model'].names # set class names attribute
if self.autoshape:
model = model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS
return model.to(device)
except Exception as e:
help_url = 'https://github.com/ultralytics/yolov5/issues/36'
s = 'Cache may be out of date, try `force_reload=True`. See %s for help.' % help_url
raise Exception(s) from e
def inference(self, imgs):
return self.model(imgs)
if __name__ == '__main__':
model = YOLOv5(name='yolov5s', pretrained=True, channels=3, classes=80, autoshape=True, verbose=True)
imgs = ['data/images/zidane.jpg', # filename
'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg', # URI
cv2.imread('data/images/bus.jpg')[:, :, ::-1], # OpenCV
Image.open('data/images/bus.jpg'), # PIL
np.zeros((320, 640, 3))] # numpy
results = model.inference(imgs)
的模块,可以通过PyTorch Hub加载和使用YOLOv5模型。
class ImageToVideoConverter:
def __init__(self, input_folder='./image', output_file='./output.mp4', frame_size=(960, 540), fps=30):
self.input_folder = input_folder
self.output_file = output_file
self.frame_size = frame_size
self.fps = fps
def convert(self):
image_extensions = ["*.png", "*.PNG", "*.JPG", "*.JPEG", "*.jpg", "*.jpeg", "*.bmp"]
image_files = []
for ext in image_extensions:
image_files.extend(glob.glob(os.path.join(self.input_folder, ext)))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(self.output_file, fourcc, self.fps, self.frame_size)
for image_file in image_files:
img = cv2.imread(image_file)
img_resized = cv2.resize(img, self.frame_size)
converter = ImageToVideoConverter(input_folder='./images', output_file='./output.mp4')
class ImageProcessor:
def __init__(self, model, img, augment, save_dir, path, visualize):
self.model = model
self.img = img
self.augment = augment
self.save_dir = save_dir
self.path = path
self.visualize = visualize
def process_image(self):
mulpicplus = "3" # 1 for normal,2 for 4pic plus,3 for 9pic plus and so on
assert (int(mulpicplus) >= 1)
if mulpicplus == "1":
pred = self.model(self.img,
visualize=increment_path(self.save_dir / Path(self.path).stem, mkdir=True) if self.visualize else False)[0]
xsz = self.img.shape[2]
ysz = self.img.shape[3]
mulpicplus = int(mulpicplus)
x_smalloccur = int(xsz / mulpicplus * 1.2)
y_smalloccur = int(ysz / mulpicplus * 1.2)
for i in range(mulpicplus):
x_startpoint = int(i * (xsz / mulpicplus))
for j in range(mulpicplus):
y_startpoint = int(j * (ysz / mulpicplus))
x_real = min(x_startpoint + x_smalloccur, xsz)
y_real = min(y_startpoint + y_smalloccur, ysz)
if (x_real - x_startpoint) % 64 != 0:
x_real = x_real - (x_real - x_startpoint) % 64
if (y_real - y_startpoint) % 64 != 0:
y_real = y_real - (y_real - y_startpoint) % 64
dicsrc = self.img[:, :, x_startpoint:x_real,
pred_temp = self.model(dicsrc,
visualize=increment_path(self.save_dir / Path(self.path).stem, mkdir=True) if self.visualize else False)[0]
pred_temp[..., 0] = pred_temp[..., 0] + y_startpoint
pred_temp[..., 1] = pred_temp[..., 1] + x_startpoint
if i == 0 and j == 0:
pred = pred_temp
pred = torch.cat([pred, pred_temp], dim=1)
# Apply NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
return pred
class YOLOv5Trainer:
def __init__(self, hyp, opt, device):
self.hyp = hyp
self.opt = opt
self.device = device
def train(self):
save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, = \
self.opt.save_dir, self.opt.epochs, self.opt.batch_size, self.opt.weights, self.opt.single_cls, \
self.opt.evolve, self.opt.data, self.opt.cfg, self.opt.resume, self.opt.noval, self.opt.nosave, \
# Directories
save_dir = Path(save_dir)
wdir = save_dir / 'weights'
wdir.mkdir(parents=True, exist_ok=True) # make dir
last = wdir / 'last.pt'
best = wdir / 'best.pt'
results_file = save_dir / 'results.txt'
# Hyperparameters
if isinstance(self.hyp, str):
with open(self.hyp) as f:
self.hyp = yaml.safe_load(f) # load hyps dict
LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in self.hyp.items()))
# Save run settings
with open(save_dir / 'hyp.yaml', 'w') as f:
yaml.safe_dump(self.hyp, f, sort_keys=False)
with open(save_dir / 'opt.yaml', 'w') as f:
yaml.safe_dump(vars(self.opt), f, sort_keys=False)
# Configure
plots = not evolve # create plots
cuda = self.device.type != 'cpu'
init_seeds(1 + RANK)
with open(data) as f:
data_dict = yaml.safe_load(f) # data dict
# Loggers
loggers = {'wandb': None, 'tb': None} # loggers dict
if RANK in [-1, 0]:
# TensorBoard
if not evolve:
prefix = colorstr('tensorboard: ')
LOGGER.info(f"{prefix}Start with 'tensorboard --logdir {self.opt.project}', view at http://localhost:6006/")
loggers['tb'] = SummaryWriter(str(save_dir))
# W&B
self.opt.hyp = self.hyp # add hyperparameters
run_id = torch.load(weights).get('wandb_id') if weights.endswith('.pt') and os.path.isfile(weights) else None
run_id = run_id if self.opt.resume else None # start fresh run if transfer learning
wandb_logger = WandbLogger(self.opt, save_dir.stem, run_id, data_dict)
loggers['wandb'] = wandb_logger.wandb
if loggers['wandb']:
data_dict = wandb_logger.data_dict
weights, epochs, self.hyp = self.opt.weights, self.opt.epochs, self.opt.hyp # may update weights, epochs if resuming
nc = 1 if single_cls else int(data_dict['nc']) # number of classes
names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, data) # check
is_coco = data.endswith('coco.yaml') and nc == 80 # COCO dataset
# Model
pretrained = weights.endswith('.pt')
if pretrained:
with torch_distributed_zero_first(RANK):
weights = attempt_download(weights) # download if not found locally
ckpt = torch.load(weights, map_location=self.device) # load checkpoint
model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=self.hyp.get('anchors')).to(self.device) # create
exclude = ['anchor'] if (cfg or self.hyp.get('anchors')) and not resume else [] # exclude keys
state_dict = ckpt['model'].float().state_dict() # to FP32
state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect
model.load_state_dict(state_dict, strict=False) # load
LOGGER.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report
model = Model(cfg, ch=3, nc=nc, anchors=self.hyp.get('anchors')).to(self.device) # create
with torch_distributed_zero_first(RANK):
check_dataset(data_dict) # check
train_path = data_dict['train']
val_path = data_dict['val']
# Freeze
freeze = [] # parameter names to freeze (full or partial)
for k, v in model.named_parameters():
v.requires_grad = True # train all layers
if any(x in k for x in freeze):
print('freezing %s' % k)
v.requires_grad = False
# Optimizer
nbs = 64 # nominal batch size
accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
self.hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
LOGGER.info(f"Scaled weight_decay = {self.hyp['weight_decay']}")
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
for k, v in model.named_modules():
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
pg2.append(v.bias) # biases
if isinstance(v, nn.BatchNorm2d):
pg0.append(v.weight) # no decay
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
pg1.append(v.weight) # apply decay
if self.opt.adam:
optimizer = optim.Adam(pg0, lr=self.hyp['lr0'], betas=(self.hyp['momentum'], 0.999)) # adjust beta1 to momentum
optimizer = optim.SGD(pg0, lr=self.hyp['lr0'], momentum=self.hyp['momentum'], nesterov=True)
optimizer.add_param_group({'params': pg1,
YOLO_mark 的标注界面如图4.2所示,标注过程中只需用矩形框将目标物体框在框内即可,尽可能准确不留背景的锁住目标物体就可以得到一个有效的标签。至此,标签格式如图所示
上述的三种方法都是最为基础的数据增广方式,而当样本中含有多类别时,简单的处理方式,可能会在训练样本中加入过多的无效样本,反而使网络鲁棒性下降.由此又提出了cutmixl[S4]的数据增广方法。顾名思义,cutmix就是将图片先裁剪再融合,具体操作就是在某张特征图上随机生成一个裁剪区域,然后将另一张图片同一位置的裁剪部分补充到特征图的空白区域,最终得到的分类结果,按一定比例分配。cutmix 数据增广方式有如下几个优点:(1) cutmix数据增广方式没有补О操作,因而并不会存在无效的非像素信息出现在训练过程中,能够有效提高训练效率; (2) cutmix要求网络学习的重点集中到目标的局部信息,进一步增强网络的强定位能力。(3)合成的训练样本图像中的目标物体并不会出现不自然的情况,能够有效提高网络模型的分类能力。
但cutmix数据增广方式对于单类别的数据集并没有有效提升,因此本文使用的数据增广方式为cutmix 的增强版本,Mosaic数据增广。Mosaic数据增广[55]利用了来自原始训练样本中的四张图片,将四张图片进行拼接后,就会得到一张新的训练样本数据,同时还能获得这四张图片中的目标框位置信息。Mosaic数据增广后的图像如图所示:
Epoch gpu_mem box obj cls labels img_size
1/200 20.8G 0.01576 0.01955 0.007536 22 1280: 100%|██████████| 849/849 [14:42<00:00, 1.04s/it]
Class Images Labels P R [email protected] [email protected]:.95: 100%|██████████| 213/213 [01:14<00:00, 2.87it/s]
all 3395 17314 0.994 0.957 0.0957 0.0843
Epoch gpu_mem box obj cls labels img_size
2/200 20.8G 0.01578 0.01923 0.007006 22 1280: 100%|██████████| 849/849 [14:44<00:00, 1.04s/it]
Class Images Labels P R [email protected] [email protected]:.95: 100%|██████████| 213/213 [01:12<00:00, 2.95it/s]
all 3395 17314 0.996 0.956 0.0957 0.0845
Epoch gpu_mem box obj cls labels img_size
3/200 20.8G 0.01561 0.0191 0.006895 27 1280: 100%|██████████| 849/849 [10:56<00:00, 1.29it/s]
Class Images Labels P R [email protected] [email protected]:.95: 100%|██████████| 213/213 [00:52<00:00, 4.04it/s]
all 3395 17314 0.996 0.957 0.0957 0.0845
# parameters
nc: 1 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
# anchors
- [5,6, 8,14, 15,11] #4
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 backbone
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, BottleneckCSP, [128]], #160*160
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]], #80*80
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]], #40*40
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, BottleneckCSP, [1024, False]], # 9 20*20
# YOLOv5 head
[[-1, 1, Conv, [512, 1, 1]], #20*20
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #40*40
[[-1, 6], 1, Concat, [1]], # cat backbone P4 40*40
[-1, 3, BottleneckCSP, [512, False]], # 13 40*40
[-1, 1, Conv, [512, 1, 1]], #40*40
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3 80*80
[-1, 3, BottleneckCSP, [512, False]], # 17 (P3/8-small) 80*80
[-1, 1, Conv, [256, 1, 1]], #18 80*80
[-1, 1, nn.Upsample, [None, 2, 'nearest']], #19 160*160
[[-1, 2], 1, Concat, [1]], #20 cat backbone p2 160*160
[-1, 3, BottleneckCSP, [256, False]], #21 160*160
[-1, 1, Conv, [256, 3, 2]], #22 80*80
[[-1, 18], 1, Concat, [1]], #23 80*80
[-1, 3, BottleneckCSP, [256, False]], #24 80*80
[-1, 1, Conv, [256, 3, 2]], #25 40*40
[[-1, 14], 1, Concat, [1]], # 26 cat head P4 40*40
[-1, 3, BottleneckCSP, [512, False]], # 27 (P4/16-medium) 40*40
[-1, 1, Conv, [512, 3, 2]], #28 20*20
[[-1, 10], 1, Concat, [1]], #29 cat head P5 #20*20
[-1, 3, BottleneckCSP, [1024, False]], # 30 (P5/32-large) 20*20
[[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(p2, P3, P4, P5)
深度学习算法中并不需要人工选择目标所在区域,因此为了解决如何自主寻找候选区域的问题,选择预设一组或多组不同尺度不同位置的锚点框,这样只需要留下与锚点框交并比大于预设阈值的预测框就可以得到目标信息,YOLOv5s的anchor 设置如图所示:
虽然YOLOv5s的准确率和召回率并不低,但由于YOLOv5s网络 anchors的设置具有泛用性,适用于大部分数据集的目标尺度,并没有完全将特征提取能力聚焦到小尺度上,所以接下来对网络的改进将针对anchors,使网络尽可能的锁定到实际目标所在的高层语义信息。在上文中提到,YOLOv5网络的检测头接在PANet后,因此需要在Head输出层PAN结构部分增加一层高分辨率网络,鉴于PANet的双塔结构,还需要在FPN层补充一次下采样过程,使网络模型能更准确细致的保留且学习到弱小目标的语义特征信息。增加高分辨率小尺度检测头后,网络的锚点信息如图所示:
针对红外弱小目标的小尺度检测头添加完成,训练参数与上一次实验的参数对齐,在Ubuntu18.04操作系统下进行,软件平台为Pycharm,训练与验证框架为Pytorch1.6.0。使用的CUDA版本为10.1,Python 版本为3.6,CPU为i9 9900k,GPU为两张NVIDIARTX 2080ti。Batchsize设为8,输入图像大小为1920*1080,初始学习率采用0.01训练100epoch,直至训练loss 如图时训练结束。