yolov3项目实战——基于PyTorch实现的目标检测项目实战(附代码)

一、数据准备

数据准备见:使用精灵标注助手制作yolov3训练数据集(附解析xml代码)

本篇文章为项目实战部分,理论部分简析见:YoLov1-YoLov3演变历程(思维导图)

二、项目代码部分:

1、cfg.py

CLASS_NUM = 10

"anchor box是对coco数据集聚类获得"
ANCHORS_GROUP_KMEANS = {
    52: [[10,13],  [16,30],  [33,23]],
    26: [[30,61],  [62,45],  [59,119]],
    13: [[116,90],  [156,198],  [373,326]]}      #大特征图小感受野,小特征图大感受野

ANCHORS_GROUP = {
    13: [[360, 360], [360, 180], [180, 360]],
    26: [[180, 180], [180, 90], [90, 180]],
    52: [[90, 90], [90, 45], [45, 90]]}          #根据经验指定框的建议框

ANCHORS_GROUP_AREA = {
    13: [w * h for w, h in ANCHORS_GROUP[13]],     #建议框的面积(与实际框的面积可以求IOU值)
    26: [w * h for w, h in ANCHORS_GROUP[26]],
    52: [w * h for w, h in ANCHORS_GROUP[52]],
}

if __name__ == '__main__':

    for feature_size, anchors in ANCHORS_GROUP.items():
        print(feature_size)
        print(anchors)
    for feature_size, anchor_area in ANCHORS_GROUP_AREA.items():
        print(feature_size)
        print(anchor_area)

2、dataset.py

import torch
from torch.utils.data import Dataset,DataLoader
import torchvision
import numpy as np
import cfg
import os
from PIL import Image
import math

transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

def one_hot(cls_num, i):               #cls_num 类别数,i当前哪一个类别
    b = np.zeros(cls_num)
    b[i] = 1.
    return b

class MyDataset(Dataset):

    def __init__(self,LABEL_FILE_PATH,IMG_BASE_DIR):
        self.LABEL_FILE_PATH = LABEL_FILE_PATH
        with open(self.LABEL_FILE_PATH) as f:                #打开TXT文档
            self.IMG_BASE_DIR = IMG_BASE_DIR

            self.dataset = f.readlines()                #读取所有行

    def __len__(self):
        return len(self.dataset)                         #总共有多上张图片(总的行数等于图片的额张数)

    def __getitem__(self, index):
        labels = {}
        line = self.dataset[index]                        #通过索引每次取一行。每行的数据依据目标类别及个数不同热热不同
        strs = line.split()                               #按空格切割
        _img_data = Image.open(os.path.join(self.IMG_BASE_DIR, strs[0]))   #第0个位图片,直接打开
        img_data = transforms(_img_data)                  #打开之后转为tenosr类型。归一化等操作
        # _boxes = np.array(float(x) for x in strs[1:])
        # print()
        _boxes = np.array(list(map(float, strs[1:])))     #将str类型转为浮点型
        # print(_boxes,"boxes:")                            #[ 1. 12. 13. 51. 18.  2. 22. 31. 55. 98.  2. 44. 33. 62. 62.]
        boxes = np.split(_boxes, len(_boxes) // 5)        #每5个(1个类别4个坐标)位一组进行等分
        # print(boxes,"boxessss")     #[array([ 1., 12., 13., 51., 18.]), array([ 2., 22., 31., 55., 98.]), array([ 2., 44., 33., 62., 62.])] boxessss,根据图片中的物体不同二不同

        for feature_size, anchors in cfg.ANCHORS_GROUP.items():   # cfg.ANCHORS_GROUP.items就是建议框,feature_size, anchors分别为尺寸大小和锚框(也就是键值对)
            labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLASS_NUM))    #一张图划分为很多个格子,其中有目标的部分用1填空,其他部分用0填空,此处是用0矩阵相加的形式,且不同大小的特征图(13x13,26x26,52x52)都需要
            #3代表的是每种尺寸有三个建议框,5是一个置信度和四个坐标cfg.CLASS_NUM为10个类
            for box in boxes:
                cls, cx, cy, w, h = box                   #(中心点和宽和高)制作txt文件或读取时进行转换
                cx_offset, cx_index = math.modf(cx * feature_size / cfg.IMG_WIDTH)    #math.modf()返回小数部分和整数部分,cx_offset位小数部分,也就是目标中心点相对于当前小格子的位置,cx_index是整数部分也就是当前小格子在大图中的位置,整个大图是压缩在0到1之间的
                #同过置信度去判断特征点中是否有目标(有目标则进行反算),整数直接在特征图中去数,小数部分通过学习得到
                cy_offset, cy_index = math.modf(cy * feature_size / cfg.IMG_WIDTH)
                # print(cy_offset, cy_index)

                for i, anchor in enumerate(anchors):      #i代表的是第几个锚框,anchor代表的是锚框的大小
                    # print("I:",i)
                    # print("anchors:",anchor)
                    anchor_area = cfg.ANCHORS_GROUP_AREA[feature_size][i]     #锚框的面积
                    # print(anchor_area,"anchor:")
                    p_w, p_h = w / anchor[0], h / anchor[1]        #w是物体的真实宽,h是物体的真高,anchor[0]代表的真实宽,anchor[1]代表的是锚框的高
                    p_area = w * h                                 #物体的真实面积
                    iou = min(p_area, anchor_area) / max(p_area, anchor_area)   #IOU作为置信度的标签,此处计算IOU的方式是在锚框和真实框之间用最小面积比上最大的面积忽略框的相对位置是因为都有着同一个中心.避免了建议框大于真实
                    labels[feature_size][int(cy_index), int(cx_index), i] = np.array(
                        [iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h), *one_hot(cfg.CLASS_NUM, int(cls))])#cfg.CLASS_NUM总的类别数,i是第几个框,*表示展开列表

                    # print("labels:",labels)
                    # print(*one_hot(cfg.CLASS_NUM, int(cls)))
                    # print("===============")
        # print(labels[13])
        # print(labels[26])
        # print(labels[52])

        return labels[13], labels[26], labels[52], img_data
if __name__ == '__main__':
    x=one_hot(10,2)
    print(x)
    label_path = r".\1.txt"                                  #标签文件
    img_dir = r".\imgs"                                      #训练图片所在的文件夹路径
    data = MyDataset(label_path,img_dir)
    dataloader = DataLoader(data,3,shuffle=True)
    for i,x in enumerate(dataloader):
        print(x[0].shape)
        print(x[1].shape)
        print(x[2].shape)
        print(x[3].shape)
    for target_13, target_26, target_52, img_data in dataloader:
        print(target_13.shape)
        print(target_26.shape)
        print(target_52.shape)
        print(img_data.shape)

3、FRN.py

import torch
import torch.nn as nn

class FRN(nn.Module):
    def __init__(self, num_features, eps=1e-6, learnable_eps=False):
        super().__init__()
        shape = (1, num_features, 1, 1)                                        #num_features为输出通道,标准化只在输出通道上变化,其他通道不改变

        self.eps = nn.Parameter(torch.ones(*shape) * eps,requires_grad=True)
        if not learnable_eps:
            self.eps.requires_grad_(False)

        self.gamma = nn.Parameter(torch.Tensor(*shape),requires_grad=True)     #下面三个值都需要学习
        self.beta = nn.Parameter(torch.Tensor(*shape),requires_grad=True)
        self.tau = nn.Parameter(torch.Tensor(*shape),requires_grad=True)
        self.reset_parameters()

    def forward(self, x):
        avg_dims = tuple(range(2, x.dim()))                                    #求平均,range(2,4)=2,3  2轴和3轴也就是宽和高
        nu2 = torch.pow(x, 2).mean(dim=avg_dims, keepdim=True)
        # nu2 = torch.pow(x, 2).mean(dim=(2,3), keepdim=True)
        # x = x * torch.rsqrt(nu2 + torch.abs(self.eps))                       #rsqrt平方根的倒数
        x = x /torch.sqrt(nu2 + torch.abs(self.eps))                           #abs防止分母为负数
        return torch.max(self.gamma * x + self.beta, self.tau)                 #取最大的那个

    def reset_parameters(self):
        nn.init.ones_(self.gamma)
        nn.init.ones_(self.beta)
        nn.init.ones_(self.tau)


if __name__ == '__main__':
    x = torch.rand(10,16,224,224)
    frn = FRN(16)
    # print(frn(x))          #没有负数,偏向了一侧
    print(frn(x).shape)      #torch.Size([10, 16, 224, 224])标准化之后的形状不会改变但是值会改变

4、Module.py

import torch
import torch.nn.functional as F

#定义上采样层,邻近插值
class UpsampleLayer(torch.nn.Module):
    def __init__(self):
        super(UpsampleLayer, self).__init__()

    def forward(self, x):
        return F.interpolate(x, scale_factor=2, mode='nearest')

#定义卷积层
class ConvolutionalLayer(torch.nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False):
        super(ConvolutionalLayer, self).__init__()

        self.sub_module = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=bias),
            torch.nn.BatchNorm2d(out_channels),
            torch.nn.LeakyReLU(0.1),
            # torch.nn.InstanceNorm2d(out_channels),
            # torch.nn.GroupNorm(8, out_channels),
            # FRN(out_channels)            #和torch.nn.Conv2d差不多
        )

    def forward(self, x):
        return self.sub_module(x)

#定义残差结构
class ResidualLayer(torch.nn.Module):

    def __init__(self, in_channels):
        super(ResidualLayer, self).__init__()

        self.sub_module = torch.nn.Sequential(
            ConvolutionalLayer(in_channels, in_channels // 2, 1, 1, 0),     #1x1卷积
            ConvolutionalLayer(in_channels // 2, in_channels, 3, 1, 1),     #3x3卷积    #两次卷积完成之后大小不变,  残差后需要还原通道
        )

    def forward(self, x):
        return x + self.sub_module(x)

#定义下采样层
class DownsamplingLayer(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DownsamplingLayer, self).__init__()

        self.sub_module = torch.nn.Sequential(
            ConvolutionalLayer(in_channels, out_channels, 3, 2, 1)
        )

    def forward(self, x):
        return self.sub_module(x)

#定义卷积块
class ConvolutionalSet(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvolutionalSet, self).__init__()

        self.sub_module = torch.nn.Sequential(
            ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
            ConvolutionalLayer(out_channels, in_channels, 3, 1, 1),

            ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
            ConvolutionalLayer(out_channels, in_channels, 3, 1, 1),

            ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
        )

    def forward(self, x):
        return self.sub_module(x)

#定义主网络
class MainNet(torch.nn.Module):

    def __init__(self):
        super(MainNet, self).__init__()

        self.trunk_52 = torch.nn.Sequential(
            ConvolutionalLayer(3, 32, 3, 1, 1),
            DownsamplingLayer(32, 64),
            ResidualLayer(64),
            DownsamplingLayer(64, 128),
            ResidualLayer(128),
            ResidualLayer(128),
            DownsamplingLayer(128, 256),
            ResidualLayer(256),                  #8层残差
            ResidualLayer(256),
            ResidualLayer(256),
            ResidualLayer(256),
            ResidualLayer(256),
            ResidualLayer(256),
            ResidualLayer(256),
            ResidualLayer(256),
        )

        #做外接口
        self.trunk_26 = torch.nn.Sequential(
            DownsamplingLayer(256, 512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
        )

        self.trunk_13 = torch.nn.Sequential(
            DownsamplingLayer(512, 1024),
            ResidualLayer(1024),
            ResidualLayer(1024),
            ResidualLayer(1024),
            ResidualLayer(1024)
        )

        self.convset_13 = torch.nn.Sequential(
            ConvolutionalSet(1024, 512)
        )

        self.detetion_13 = torch.nn.Sequential(
            ConvolutionalLayer(512, 1024, 3, 1, 1),
            torch.nn.Conv2d(1024, 45, 1, 1, 0)        #45代表3组(10个类别+1个置信度+4个坐标)
        )

        self.up_26 = torch.nn.Sequential(
            ConvolutionalLayer(512, 256, 1, 1, 0),     #上采样得到的图缩小为原来的一半
            UpsampleLayer()
        )

        self.convset_26 = torch.nn.Sequential(
            ConvolutionalSet(768, 256)
        )

        self.detetion_26 = torch.nn.Sequential(
            ConvolutionalLayer(256, 512, 3, 1, 1),
            torch.nn.Conv2d(512, 45, 1, 1, 0)
        )

        self.up_52 = torch.nn.Sequential(
            ConvolutionalLayer(256, 128, 1, 1, 0),
            UpsampleLayer()
        )

        self.convset_52 = torch.nn.Sequential(
            ConvolutionalSet(384, 128)
        )

        self.detetion_52 = torch.nn.Sequential(
            ConvolutionalLayer(128, 256, 3, 1, 1),
            torch.nn.Conv2d(256, 45, 1, 1, 0)
        )

    def forward(self, x):
        h_52 = self.trunk_52(x)         #下采样输出
        h_26 = self.trunk_26(h_52)      #下采样输出
        h_13 = self.trunk_13(h_26)      #下采样输出
        convset_out_13 = self.convset_13(h_13)
        detetion_out_13 = self.detetion_13(convset_out_13)          #在13的特征图输出后一部分用来上采样,另一部分用来做侦测
        up_out_26 = self.up_26(convset_out_13)                      #上采样
        route_out_26 = torch.cat((up_out_26, h_26), dim=1)          #concatenate,上采样得到的与下采样得到的在1轴上拼接(只拼接数据,不拼接批次)
        convset_out_26 = self.convset_26(route_out_26)
        detetion_out_26 = self.detetion_26(convset_out_26)
        up_out_52 = self.up_52(convset_out_26)
        route_out_52 = torch.cat((up_out_52, h_52), dim=1)           #拼接
        convset_out_52 = self.convset_52(route_out_52)
        detetion_out_52 = self.detetion_52(convset_out_52)
        return detetion_out_13, detetion_out_26, detetion_out_52            #如果考虑侦测更小或更大的目标可以考虑增加层数

#测试网络
if __name__ == '__main__':
    net = MainNet()
    x = torch.randn([2,3,416,416],dtype=torch.float32)
    # 测试网络
    y_13, y_26, y_52 = net(x)
    print(y_13.shape)
    print(y_26.shape)
    print(y_52.shape)
    print(y_13.permute([0,2,3,1]).shape)
    print(y_13.view(-1,13,13,3,15).shape)       #之所以要进行通道转换是因为标签的shape是NHWC(其中C由两部分组成),变动时可以在网络或者标签中更改

5、train.py

import dataset
from model import *
import torch
from torch.utils.data import DataLoader
import os

# 损失
def loss_fn(output, target, alpha):

    conf_loss_fn = torch.nn.BCEWithLogitsLoss()
    crood_loss_fn = torch.nn.MSELoss()
    cls_loss_fn = torch.nn.CrossEntropyLoss()

    #[N,C,H,W]-->>[N,H,W,C]
    output = output.permute(0, 2, 3, 1)
    # [N,C,H,W]-->>[N,H,W,3,15]
    output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)
    output = output.cpu().double()
    mask_obj = target[..., 0] > 0                                                  #布尔值
    output_obj = output[mask_obj]                                                  #索引到的都是有目标的
    target_obj = target[mask_obj]
    loss_obj_conf = conf_loss_fn(output_obj[:, 0], target_obj[:, 0])               #第0个为置信度(IOU值),学习出来的值可能很小,所以有目标直接为1,没有目标为0
    loss_obj_crood = crood_loss_fn(output_obj[:, 1:5], target_obj[:, 1:5])         #坐标(中心点和宽和高)

    #改!!!!!,torch.argmax取索引(1)轴
    # loss_obj_cls = cls_loss_fn(output_obj[:, 5:], target_obj[:, 5:])               #交叉熵损失函数标签不用做onehot编码
    loss_obj_cls = cls_loss_fn(output_obj[:, 5:], torch.argmax(target_obj[:,5:], dim=1))

    loss_obj = loss_obj_conf + loss_obj_crood + loss_obj_cls

    mask_noobj = target[..., 0] == 0                                               #没有目标的值学习置信度
    output_noobj = output[mask_noobj]
    target_noobj = target[mask_noobj]
    loss_noobj = conf_loss_fn(output_noobj[:, 0], target_noobj[:, 0])
    loss = alpha * loss_obj + (1 - alpha) * loss_noobj
    return loss

if __name__ == '__main__':
    save_path = r"/net_yolo.pth"                    #参数的保存路径

    train_label_path = r"\1.txt"                #训练集标签路径
    train_img_dir = r"\imgss"                   #训练集图片所在的文件夹

    valid_label_path = ""                      #验证集的标签路径
    valid_img_dir = r""                        #验证集的图片所在的文件夹

    train_Dataset= dataset.MyDataset(train_label_path,train_img_dir)
    train_loader = DataLoader(train_Dataset, batch_size=3, shuffle=True)

    valid_Dataset = dataset.MyDataset(valid_label_path, valid_img_dir)
    valid_loader = DataLoader(valid_Dataset, batch_size=3, shuffle=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net = MainNet().to(device)

    if os.path.exists(save_path):
        net.load_state_dict(torch.load(save_path))
    else:
        print("NO Param")

    net.train()
    opt = torch.optim.Adam(net.parameters())

    epoch = 0
    last_train_loss = 10000
    while True:
        for train_target_13, train_target_26, train_target_52, train_img_data in train_loader:
            train_img_data = train_img_data.to(device)
            train_output_13, train_output_26, train_output_52 = net(train_img_data)
            train_loss_13 = loss_fn(train_output_13, train_target_13, 0.9)                        #a的值0.9,自己训练可以给0.6左右,如果要检测的目标在背景图上占的面积很小需要将a的值给大些,否则召回率较低
            train_loss_26 = loss_fn(train_output_26, train_target_26, 0.9)
            train_loss_52 = loss_fn(train_output_52, train_target_52, 0.9)
            train_loss = train_loss_13 + train_loss_26 + train_loss_52                             #损失求和

            opt.zero_grad()
            train_loss.backward()
            opt.step()

    
            if train_loss < last_train_loss:
                print("epoch:  {},train_loss: {}  损失降低了!!".format(epoch, train_loss.item()))
                torch.save(net.state_dict(), save_path)
            else:
                print(("epoch:  {},train_loss: {}  损失正在上升!!!!!!!!!!".format(epoch, train_loss.item())))

            last_train_loss = min(train_loss.item(), last_train_loss)           #损失下降时才保存模型的参数

        for valid_target_13, valid_target_26, valid_target_52, valid_img_data in valid_loader:
            valid_img_data = valid_img_data.to(device)
            valid_output_13, valid_output_26, valid_output_52 = net(valid_img_data)
            valid_loss_13 = loss_fn(valid_output_13, valid_target_13, 0.9)                        #a的值0.9,自己训练可以给0.6左右,如果要检测的目标在背景图上占的面积很小需要将a的值给大些,否则召回率较低
            valid_loss_26 = loss_fn(valid_output_26, valid_target_26, 0.9)
            valid_loss_52 = loss_fn(valid_output_52, valid_target_52, 0.9)
            valid_loss = valid_loss_13 + valid_loss_26 + valid_loss_52                                  #损失求和
            print(valid_loss)
        epoch += 1

6、tool.py

import numpy as np
import torch


def ious(box, boxes, isMin = False):
    box_area = (box[3] - box[1]) * (box[4] - box[2])
    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 4] - boxes[:, 2])
    xx1 = torch.max(box[1], boxes[:, 1])
    yy1 = torch.max(box[2], boxes[:, 2])
    xx2 = torch.min(box[3], boxes[:, 3])
    yy2 = torch.min(box[4], boxes[:, 4])

    w = torch.clamp(xx2 - xx1, min=0)
    h = torch.clamp(yy2 - yy1, min=0)

    inter = w * h

    ovr2 = inter/ (box_area + area - inter)

    return ovr2

def nms(boxes, thresh=0.3, isMin = True):

    if boxes.shape[0] == 0:
        return np.array([])

    _boxes = boxes[(-boxes[:, 0]).argsort()]
    r_boxes = []

    while _boxes.shape[0] > 1:
        a_box = _boxes[0]
        b_boxes = _boxes[1:]
        r_boxes.append(a_box)

        index = np.where(ious(a_box, b_boxes,isMin) < thresh)
        _boxes = b_boxes[index]
    if _boxes.shape[0] > 0:
        r_boxes.append(_boxes[0])

    return torch.stack(r_boxes)

if __name__ == '__main__':
    # a = np.array([1,1,11,11])
    # bs = np.array([[1,1,10,10],[11,11,20,20]])
    # print(iou(a,bs))

    bs = torch.tensor([[1, 1, 10, 10, 40,8], [1, 1, 9, 9, 10,9], [9, 8, 13, 20, 15,3], [6, 11, 18, 17, 13,2]])
    # print(bs[:,3].argsort())
    print(nms(bs))

7、detect.py

from model import *
import cfg
import torch
import numpy as np
import PIL.Image as pimg
import PIL.ImageDraw as draw
from PIL import ImageFont
import tool
import os

class Detector(torch.nn.Module):

    def __init__(self,save_path):
        super(Detector, self).__init__()

        self.net = MainNet().cuda()
        self.net.load_state_dict(torch.load(save_path))
        self.net.eval()

    def forward(self, input, thresh, anchors):
        output_13, output_26, output_52 = self.net(input)             #三种尺寸的输出
        idxs_13, vecs_13 = self._filter(output_13, thresh)            #过滤掉小于阈值的(索引,值)
        boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13])
        idxs_26, vecs_26 = self._filter(output_26, thresh)
        boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26])
        idxs_52, vecs_52 = self._filter(output_52, thresh)
        boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52])
        return torch.cat([boxes_13, boxes_26, boxes_52], dim=0)        #在下方进行NMS

    def _filter(self, output, thresh):
        output = output.permute(0, 2, 3, 1)
        output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)

        #压缩置信度到0到1之间
        mask = torch.sigmoid(output[..., 0]) > thresh                  #sigmoid激活
        idxs = mask.nonzero()                                          #N,4
        vecs = output[mask]                                            #原图的目标在特征图上的索引
        return idxs, vecs                                              #idxs合格的索引,另外的值(置信度,坐标和类别)

    def _parse(self, idxs, vecs, t, anchors):
        if len(idxs) == 0:                                             #索引为0时为空
            return torch.randn(0, 6).cuda()                            #6,置信度,坐标,类别
        else:
            anchors = torch.tensor(anchors, dtype=torch.float32).cuda()
            a = idxs[:, 3]                                             # 建议框:3

            # confidence = vecs[:, 0]
            # "压缩置信度值到0-1之间"
            confidence = torch.sigmoid(vecs[:, 0])                      #置信度
            _classify = vecs[:, 5:]                                     #类别
            classify = torch.argmax(_classify, dim=1).float()

            cy = (idxs[:, 1].float() + torch.sigmoid(vecs[:, 2])) * t   #h与y对应
            cx = (idxs[:, 2].float() + torch.sigmoid(vecs[:, 1])) * t   #w与x对应
            w = anchors[a, 0] * torch.exp(vecs[:, 3])                   #log的反函数
            h = anchors[a, 1] * torch.exp(vecs[:, 4])
            x1 = cx - w / 2
            y1 = cy - h / 2
            x2 = x1 + w
            y2 = y1 + h
            # print(confidence)
            out = torch.stack([confidence, x1, y1, x2, y2, classify], dim=1)
            return out


if __name__ == '__main__':
    font_path = r"C:\Windows\Fonts\simhei.ttf"                                                    #设置字体
    font1 = ImageFont.truetype(font_path, 21, encoding="utf-8")
    dic = {"0": "person", "1": "tiger", "2": "car", "3": "dog"}
    save_path = r"\net_yolo.pth"                        #参数保存的路径
    detector = Detector(save_path)                                                                #将参数加载到检测文件中
    imgfile_path = r''                             #待检测的图片路径
    for file in os.listdir(imgfile_path):                                                         #遍历文件夹下的图片
        with pimg.open(os.path.join(imgfile_path, file)) as img1:                                 #PIL打开做转换
            im1 = img1.convert('RGB')
            a = np.zeros([max(im1.size[0], im1.size[1]), max(im1.size[0], im1.size[1]), 3])  # 以最大边长生成0矩阵
            img_zero = pimg.fromarray(np.uint8(a))                                           # 0矩阵转为PIL
            img_zero.paste(im1, (0, 0, im1.size[0], im1.size[1]))                            # 将原来的图片贴到0矩阵生成的图片上
            img = img_zero.resize((416, 416), pimg.ANTIALIAS)
            # img.show()
            img = np.array(img) / 255
            img = torch.Tensor(img)
            img = img.unsqueeze(0)
            img = img.permute(0, 3, 1, 2)
            img = img.cuda()
            out_value = detector(img, 0.3, cfg.ANCHORS_GROUP)  # 0.3的值比较小,自己可根据实际的训练情况进行调整
            boxes = []

            for j in range(4):                                                                  #此处几个类别循环几次
                classify_mask = (out_value[..., -1] == j)
                _boxes = out_value[classify_mask]
                boxes.append(tool.nms(_boxes.cpu()))                                            #做同一个类别下的NNS,  做完NMS之后还有很多框
            for box in boxes:
                try:
                    img_draw = draw.ImageDraw(img1)
                    # print(box)
                    c,x1, y1, x2, y2, cls= box[0, 0:6]

                    print(c,x1, y1, x2, y2)
                    print(dic[str(int(cls))])

                    #由于输出的值有小于0或大于416
                    x_1 = min(max(x1, 0), 416)                                                  #在416的图像框上的坐标
                    y_1 = min(max(y1, 0), 416)
                    x_2 = min(max(x2, 0), 416)
                    y_2 = min(max(y2, 0), 416)
                    print(x_1, y_1, x_2, y_2)

                    xx_1 = x_1 * max(im1.size[0], im1.size[1]) / 416                            #还原坐标
                    yy_1 = y_1 * max(im1.size[0], im1.size[1]) / 416
                    xx_2 = x_2 * max(im1.size[0], im1.size[1]) / 416
                    yy_2 = y_2 * max(im1.size[0], im1.size[1]) / 416

                    if im1.size[0] >= im1.size[1]:                                              #原图的长大于宽
                        xx1 = min(max(xx_1, 0), im1.size[0])
                        yy1 = min(max(yy_1, 0), im1.size[1])
                        xx2 = min(max(xx_2, 0), im1.size[0])
                        yy2 = min(max(yy_2, 0), im1.size[1])
                    else:                                                                       #原图的长小于宽
                        xx1 = min(max(xx_1, 0), im1.size[0])
                        yy1 = min(max(yy_1, 0), im1.size[1])
                        xx2 = min(max(xx_2, 0), im1.size[0])
                        yy2 = min(max(yy_2, 0), im1.size[1])

                    img_draw.rectangle((xx1,yy1,xx2,yy2), outline=(255, 0, 0),width=4)
                    print(xx1,yy1,xx2,yy2)
                    print(img1.size)
                    print("-------------------------------")
                    img_draw.text((xx1+15, yy1),dic[str(int(cls))], (255,215,0), font=font1)    #绿色(0,128,0  ) 金色(255,215,0)
                except:
                    continue
            img1.show()

检测效果如下:

yolov3项目实战——基于PyTorch实现的目标检测项目实战(附代码)_第1张图片

 

 yolov3项目实战——基于PyTorch实现的目标检测项目实战(附代码)_第2张图片

 

你可能感兴趣的:(深度学习,卷积神经网络,cv,pytorch)