中国交通标志检测数据集(CCTSDB),百度网盘:https://pan.baidu.com/s/1-se8J8fQ0FgmUalu8873CQ,
提取码:9fov
1、按图片的最大边长生成一个正方形的白板
2、将原图粘贴上去
3、将2的图片resize成416x416大小
因为YOLOV3网络分别进行了32倍,16倍和8倍下采样,输出了13x13,26x26,52x52三个特征图,因此标签也要分别作三份
16倍与8倍同理类似
代码实现:
# cfg配置
IMG_H=416
IMG_W=416
CLS_NUM=6
ANCHORS_GROUP={
13:[[116,90],[156,198],[373,326]],
26:[[30,61],[62,45],[59,119]],
52:[[10,13],[16,30],[33,23]]
}
ANCHORS_GROUP_AREA={
13:[x*y for x,y in ANCHORS_GROUP[13]],
26:[x*y for x,y in ANCHORS_GROUP[26]],
52:[x*y for x,y in ANCHORS_GROUP[52]]
}
from torch.utils.data import Dataset
import torchvision
import numpy as np
import cfg
import os
from PIL import Image
import math
label_path=r"label_position.txt"
img_path=r"YOLO_V3"
def one_hot(cls_num,v):
b=np.zeros(cls_num)
b[v]=1
return b
class mydatasset(Dataset):
def __init__(self):
self.m=torchvision.transforms.ToTensor()
with open(label_path) as f:
self.dataset=f.readlines()
def __len__(self):
return len(self.dataset)
def __getitem__(self, index):
labels={}
line=self.dataset[index] # 按行读取txt文件里的内容
strs=line.split() # 将每行的数字切割成一个列表
img_data_=Image.open(os.path.join(img_path,strs[0]))
img_data=self.m(img_data_)
boxes_=np.array([float(x) for x in strs[1:]])
boxes=np.split(boxes_,len(boxes_)//5) # 将每行的没五个元素封装成一个元素
for feature_size, anchors in cfg.ANCHORS_GROUP.items():
labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLS_NUM))
for box in boxes:
cls, cx, cy, w, h = box
cx_offset, cx_index = math.modf(cx * feature_size / cfg.IMG_W) # modf返回小数和整数部分
cy_offset, cy_index = math.modf(cy * feature_size / cfg.IMG_W)
for i, anchor in enumerate(anchors):
anchor_area = cfg.ANCHORS_GROUP_AREA[feature_size][i] # 得到建议框的面积
p_w, p_h = w / anchor[0], h / anchor[1]
p_area = w * h
iou = min(p_area, anchor_area) / max(p_area, anchor_area)
labels[feature_size][int(cy_index), int(cx_index), i] = np.array(
[iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h), *one_hot(cfg.CLS_NUM, int(cls))])
return labels[13], labels[26], labels[52], img_data
Loss function的设计:均方差损失函数
loss = loss_obj + loss_noobj
(也就是通过重叠度IOU判断出哪些格子是有目标中心点的,哪些是没有的,然后分开做损失,并且没有目标的格子只对其重叠度做损失)
sum_loss=loss_13+loss_26+loss_52
(因为输出了三个特征图,因此对应有三个损失)
代码实现:
import dataset
import DarkNet_53
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
def loss_fn(output, target):
output = output.permute(0, 2, 3, 1)
output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)
mask_obj = target[..., 0]>0
mask_noobj = target[..., 0]==0
target=target.cuda().float()
loss_obj = torch.mean((output[mask_obj] - target[mask_obj]) ** 2)
loss_noobj = torch.mean((output[mask_noobj][:,0] - target[mask_noobj][:,0]) ** 2)
loss = loss_obj + loss_noobj
return loss
if __name__ == '__main__':
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
myDataset = dataset.mydatasset()
train_loader =DataLoader(myDataset, batch_size=1, shuffle=True)
save_para_path = r"net_3.pt"
net = DarkNet_53.MainNet().to(device)
net.train()
opt = torch.optim.Adam(net.parameters(),lr=0.001)
count=0
# net.load_state_dict(torch.load(save_para_path))
while True:
for i,(target_13, target_26, target_52, img_data) in enumerate(train_loader):
target_13=target_13.to(device)
target_26 = target_26.to(device)
target_52 = target_52.to(device)
img_data=img_data.to(device)
torch.cuda.empty_cache()
output_13, output_26, output_52 = net(img_data)
torch.cuda.empty_cache()
loss_13 = loss_fn(output_13, target_13)
loss_26 = loss_fn(output_26, target_26)
loss_52 = loss_fn(output_52, target_52)
loss = loss_13 + loss_26 + loss_52
opt.zero_grad()
loss.backward()
opt.step()
print(loss.item())
if i % 1000 == 0 and i > 0:
torch.save(net.state_dict(), save_para_path)
count = count + 1
print("第{0}轮训练完毕".format(count))
torch.save(net.state_dict(),save_para_path)
print("保存成功")
测试注意要点:
1、首先要将测试图片的长宽变为32的倍数
2、需要设定一个阈值
3、选择重叠度大于阈值的返回框和分类,要注意这里的返回框包括三个不同尺度,三个不同形状的九个建议框各自的返回框
4、最后对每一个分类的返回框做非极大值抑制
(如果又不知道非极大值抑制抑制的可以参考:https://mp.csdn.net/mdeditor/103183879
代码实现:
import DarkNet_53
import torch.nn as nn
import torch
import cfg
import PIL.Image as pimg
from PIL import ImageDraw
from torchvision import transforms
from utils import nms
import numpy as np
import os
path=r"net_3.pt"
def resize(img):
w,h=img.size
image=img.resize(((w//32)*w,(h//32)*h))
return image
class Detector(nn.Module):
def __init__(self):
super(Detector, self).__init__()
self.m=transforms.ToTensor()
self.net = DarkNet_53.MainNet().cuda()
self.net.load_state_dict(torch.load(path)
self.net.eval()
def forward(self, input, thresh, anchors,):
input=self.m(image).cuda()
input.unsqueeze_(0)
output_13, output_26, output_52 = self.net(input)
output_13=output_13.cpu().detach()
output_26 = output_26.cpu().detach()
output_52 = output_52.cpu().detach()
idxs_13, vecs_13 = self._filter(output_13, thresh)
boxes_13,cls_13 = self._parse(idxs_13, vecs_13, 32, anchors[26])
idxs_26, vecs_26 = self._filter(output_26, thresh)
boxes_26 ,cls_26= self._parse(idxs_26, vecs_26, 16, anchors[52])
idxs_52, vecs_52 = self._filter(output_52, thresh)
boxes_52,cls_52 = self._parse(idxs_52, vecs_52, 8, anchors[104])
boxes=torch.cat([boxes_13, boxes_26, boxes_52], dim=0)
cls=torch.cat([cls_13,cls_26,cls_52],dim=0)
return boxes,cls
def _filter(self, output, thresh):
output = output.permute(0, 2, 3, 1)
output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)
mask = output[..., 0] > thresh
idxs = mask.nonzero()
vecs = output[mask]
return idxs, vecs
def _parse(self, idxs, vecs, t, anchors):
if vecs.size()[0]==0:
return torch.tensor([]),torch.tensor([])
else:
anchors = torch.tensor(anchors).float()
n = idxs[:, 0] # 所属的图片
a = idxs[:, 3] # 建议框
cond = vecs[:, 0].float()
cls = vecs[:, 5:15]
cls_ = torch.argmax(cls, dim=1).float()
cy = (idxs[:, 1].float() + vecs[:, 2]) * t # 原图的中心点y
cx = (idxs[:, 2].float() + vecs[:, 1]) * t # 原图的中心点x
w = anchors[a, 0] * torch.exp(vecs[:, 3])
h = anchors[a, 1] * torch.exp(vecs[:, 4])
x1 = (cx - w / 2).float()
y1 = (cy - h / 2).float()
x2 = (cx + w / 2).float()
y2 = (cy + h / 2).float()
return torch.stack([cond, x1, y1, x2, y2, cls_, n.float()], dim=1), cls_
if __name__ == '__main__':
with torch.no_grad():
j=0
for name in os.listdir("test"):
image_path = os.path.join("test",name)
image_=pimg.open(image_path)
image=resize(image_)
detector = Detector()
imgdraw = ImageDraw.Draw(image_)
color = ["#FF7F24", "#FF0000", "#00FF00"]
boxes,cls = detector(image,0.5, cfg.ANCHORS_GROUP)
cls=np.array(cls)
for i in list(set(list(cls))):
boxes_=[]
for box in boxes:
if box[5] == i:
box=box.detach().numpy()
box_=np.array(box)
boxes_.append(box)
frame=nms(torch.tensor(boxes_),0.1)
for x in frame:
x1,y1,x2,y2=x[1], x[2], x[3], x[4]
imgdraw.rectangle((x1, y1, x2, y2), fill=None, outline=color[int(i)],width=2)
j=j+1
# image_.show()
image_.save("测试保存/"+str(j)+".jpg")
召回率:84.71% 精确度:82.40%
注:由于数据集的效果不是很好,再加上YOLO对小目标的侦测不强的缺点,导致召回率和精确度不是很高