哈哈哈哈哈,终于跑起来了,GitHub上找的代码,改了很多bug,超级开心,分享给大家!
实测可以在Colab上运行,需要GPU
注意
配置信息是我百度的,就是解决数据集路径问题。
虽然可以运行,但需要相应的数据集文件,有需要可以私聊,我是搬运工。
不了解YoLo的可以点下面这个链接
YoLo
这次的任务说实话自己造轮子的话很难完成,所以这次借鉴了Github,
之前也没用过,白嫖还是很爽的。
感谢布置任务的学长,最起码让我懂了深度学习训练模型究竟是个什么事情。
目前仅仅是跑起来就废了好大劲,给自己一天时间读读代码,
最终到能够自己复现出来的地步。
新年事情很多,要走很多亲戚,但实际上给的时间我觉得足够了…
自己也需要加速成长了!加油!
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={
creds.client_id} -secret={
creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {
vcode} | google-drive-ocamlfuse -headless -id={
creds.client_id} -secret={
creds.client_secret}
from google.colab import drive
drive.mount('/content/drive/')
import torch.nn as nn
class Convention(nn.Module):
def __init__(self,in_channels,out_channels,conv_size,conv_stride,padding):
super(Convention,self).__init__()
self.Conv = nn.Sequential(
nn.Conv2d(in_channels, out_channels, conv_size, conv_stride, padding),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU()
)
def forward(self, x):
return self.Conv(x)
class YOLO_V1(nn.Module):
def __init__(self,B=2,Classes_Num=20):
super(YOLO_V1,self).__init__()
self.B = B
self.Classes_Num = Classes_Num
self.Conv_448 = nn.Sequential(
Convention(3, 64, 7, 2, 3),
nn.MaxPool2d(2,2),
)
self.Conv_112 = nn.Sequential(
Convention(64, 192, 3, 1, 1),
nn.MaxPool2d(2, 2),
)
self.Conv_56 = nn.Sequential(
Convention(192, 128, 1, 1, 0),
Convention(128, 256, 3, 1, 1),
Convention(256, 256, 1, 1, 0),
Convention(256, 512, 3, 1, 1),
nn.MaxPool2d(2, 2),
)
self.Conv_28 = nn.Sequential(
Convention(512, 256, 1, 1, 0),
Convention(256, 512, 3, 1, 1),
Convention(512, 256, 1, 1, 0),
Convention(256, 512, 3, 1, 1),
Convention(512, 256, 1, 1, 0),
Convention(256, 512, 3, 1, 1),
Convention(512, 256, 1, 1, 0),
Convention(256, 512, 3, 1, 1),
Convention(512,512,1,1,0),
Convention(512,1024,3,1,1),
nn.MaxPool2d(2, 2),
)
self.Conv_14 = nn.Sequential(
Convention(1024,512,1,1,0),
Convention(512,1024,3,1,1),
Convention(1024, 512, 1, 1, 0),
Convention(512, 1024, 3, 1, 1),
Convention(1024, 1024, 3, 1, 1),
Convention(1024, 1024, 3, 2, 1),
)
self.Conv_7 = nn.Sequential(
Convention(1024,1024,3,1,1),
Convention(1024, 1024, 3, 1, 1),
)
self.Fc = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(7*7*1024,4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096,7 * 7 * (B*5 + Classes_Num)),
nn.Sigmoid()
)
def forward(self, x):
x = self.Conv_448(x)
x = self.Conv_112(x)
x = self.Conv_56(x)
x = self.Conv_28(x)
x = self.Conv_14(x)
x = self.Conv_7(x)
# batch_size * channel * height * weight -> batch_size * height * weight * channel
x = x.permute(0,2,3,1).contiguous()
x = x.view(-1,7*7*1024)
x = self.Fc(x)
x = x.view((-1,7,7,(self.B*5 + self.Classes_Num)))
return x
# 定义权值初始化
def initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
torch.nn.init.normal_(m.weight.data, 0, 0.01)
m.bias.data.zero_()
def iou(self, box1, box2): # 计算两个box的IoU值
# box: lx-左上x ly-左上y rx-右下x ry-右下y 图像向右为y 向下为x
# 1. 获取交集的矩形左上和右下坐标
interLX = max(box1[0],box2[0])
interLY = max(box1[1],box2[1])
interRX = min(box1[2],box2[2])
interRY = min(box1[3],box2[3])
# 2. 计算两个矩形各自的面积
Area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
Area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
# 3. 不存在交集
if interRX < interLX or interRY < interLY:
return 0
# 4. 计算IOU
interSection = (interRX - interLX) * (interRY - interLY)
return interSection / (Area1 + Area2 - interSection)
import numpy as np
# 这边要求的bounding_boxes为处理后的实际的样子
def NMS(bounding_boxes,confidence_threshold,iou_threshold):
# boxRow : x y dx dy c
# 1. 初步筛选,先把grid cell预测的两个bounding box取出置信度较高的那个
boxes = []
for boxRow in bounding_boxes:
# grid cell预测出的两个box,含有物体的置信度没有达到阈值
if boxRow[4] < confidence_threshold or boxRow[9] < confidence_threshold:
continue
# 获取物体的预测概率
classes = boxRow[10:-1]
class_probality_index = np.argmax(classes,axis=1)
class_probality = classes[class_probality_index]
# 选择拥有更大置信度的box
if boxRow[4] > boxRow[9]:
box = boxRow[0:4]
else:
box = boxRow[5:9]
# box : x y dx dy class_probality_index class_probality
box.append(class_probality_index)
box.append(class_probality)
boxes.append(box)
# 2. 循环直到待筛选的box集合为空
predicted_boxes = []
while len(boxes) != 0:
# 对box集合按照置信度从大到小排序
boxes = sorted(boxes, key=(lambda x : [x[4]]), reverse=True)
# 确定含有最大值信度的box会被选中
choiced_box = boxes[0]
predicted_boxes.append(choiced_box)
for index in len(boxes):
# 如果冲突的box的iou值已经大于阈值 需要丢弃
if iou(boxes[index],choiced_box) > iou_threshold:
boxes.pop(index)
return predicted_boxes
import torch.nn as nn
import math
import torch
class Yolov1_Loss(nn.Module):
def __init__(self, S=7, B=2, Classes=20, l_coord=5, l_noobj=0.5):
# 有物体的box损失权重设为l_coord,没有物体的box损失权重设置为l_noobj
super(Yolov1_Loss, self).__init__()
self.S = S
self.B = B
self.Classes = Classes
self.l_coord = l_coord
self.l_noobj = l_noobj
def iou(self, bounding_box, ground_box, gridX, gridY, img_size=448, grid_size=64): # 计算两个box的IoU值
# predict_box: [centerX, centerY, width, height]
# ground_box : [centerX / self.grid_cell_size - indexJ,centerY / self.grid_cell_size - indexI,(xmax-xmin)/self.img_size,(ymax-ymin)/self.img_size,1,xmin,ymin,xmax,ymax,(xmax-xmin)*(ymax-ymin)
# 1. 预处理 predict_box 变为 左上X,Y 右下X,Y 两个边界点的坐标 避免浮点误差 先还原成整数
# 不要共用引用
predict_box = list([0,0,0,0])
predict_box[0] = (int)(gridX + bounding_box[0] * grid_size)
predict_box[1] = (int)(gridY + bounding_box[1] * grid_size)
predict_box[2] = (int)(bounding_box[2] * img_size)
predict_box[3] = (int)(bounding_box[3] * img_size)
# [xmin,ymin,xmax,ymax]
predict_coord = list([max(0, predict_box[0] - predict_box[2] / 2), max(0, predict_box[1] - predict_box[3] / 2),min(img_size - 1, predict_box[0] + predict_box[2] / 2), min(img_size - 1, predict_box[1] + predict_box[3] / 2)])
predict_Area = (predict_coord[2] - predict_coord[0]) * (predict_coord[3] - predict_coord[1])
ground_coord = list([ground_box[5],ground_box[6],ground_box[7],ground_box[8]])
ground_Area = (ground_coord[2] - ground_coord[0]) * (ground_coord[3] - ground_coord[1])
# 存储格式 xmin ymin xmax ymax
# 2.计算交集的面积 左边的大者 右边的小者 上边的大者 下边的小者
CrossLX = max(predict_coord[0], ground_coord[0])
CrossRX = min(predict_coord[2], ground_coord[2])
CrossUY = max(predict_coord[1], ground_coord[1])
CrossDY = min(predict_coord[3], ground_coord[3])
if CrossRX < CrossLX or CrossDY < CrossUY: # 没有交集
return 0
interSection = (CrossRX - CrossLX + 1) * (CrossDY - CrossUY + 1)
return interSection / (predict_Area + ground_Area - interSection)
def forward(self, bounding_boxes, ground_truth, batch_size=32,grid_size=64, img_size=448): # 输入是 S * S * ( 2 * B + Classes)
# 定义三个计算损失的变量 正样本定位损失 样本置信度损失 样本类别损失
loss = 0
loss_coord = 0
loss_confidence = 0
loss_classes = 0
iou_sum = 0
object_num = 0
mseLoss = nn.MSELoss()
for batch in range(len(bounding_boxes)):
for i in range(self.S): # 先行 - Y
for j in range(self.S): # 后列 - X
# 取bounding box中置信度更大的框
if bounding_boxes[batch][i][j][4] < bounding_boxes[batch][i][j][9]:
predict_box = bounding_boxes[batch][i][j][5:]
# 另一个框是负样本
loss = loss + self.l_noobj * torch.pow(bounding_boxes[batch][i][j][4], 2)
loss_confidence += self.l_noobj * math.pow(bounding_boxes[batch][i][j][4].item(), 2)
else:
predict_box = bounding_boxes[batch][i][j][0:5]
predict_box = torch.cat((predict_box, bounding_boxes[batch][i][j][10:]), dim=0)
# 另一个框是负样本
loss = loss + self.l_noobj * torch.pow(bounding_boxes[batch][i][j][9], 2)
loss_confidence += self.l_noobj * math.pow(bounding_boxes[batch][i][j][9].item(), 2)
# 为拥有最大置信度的bounding_box找到最大iou的groundtruth_box
if ground_truth[batch][i][j][0][9] == 0: # 面积为0的grount_truth 为了形状相同强行拼接的无用的0-box negative-sample
loss = loss + self.l_noobj * torch.pow(predict_box[4], 2)
loss_confidence += self.l_noobj * math.pow(predict_box[4].item(), 2)
else:
object_num = object_num + 1
iou = self.iou(predict_box, ground_truth[batch][i][j][0], j * 64, i * 64)
iou_sum = iou_sum + iou
ground_box = ground_truth[batch][i][j][0]
loss = loss + self.l_coord * (torch.pow((ground_box[0] - predict_box[0]), 2) + torch.pow((ground_box[1] - predict_box[1]), 2) + torch.pow(torch.sqrt(ground_box[2] + 1e-8) - torch.sqrt(predict_box[2] + 1e-8), 2) + torch.pow(torch.sqrt(ground_box[3] + 1e-8) - torch.sqrt(predict_box[3] + 1e-8), 2))
loss_coord += self.l_coord * (math.pow((ground_box[0] - predict_box[0]), 2) + math.pow((ground_box[1] - predict_box[1]), 2) + math.pow(math.sqrt(ground_box[2] + 1e-8) - math.sqrt(predict_box[2] + 1e-8), 2) + math.pow(math.sqrt(ground_box[3] + 1e-8) - math.sqrt(predict_box[3] + 1e-8), 2))
loss = loss + torch.pow(ground_box[4] - predict_box[4], 2)
loss_confidence += math.pow(ground_box[4] - predict_box[4], 2)
ground_class = ground_box[10:]
predict_class = bounding_boxes[batch][i][j][self.B * 5:]
loss = loss + mseLoss(ground_class,predict_class) * self.Classes
loss_classes += mseLoss(ground_class,predict_class).item() * self.Classes
print("坐标误差:{} 置信度误差:{} 类别损失:{} iou_sum:{} object_num:{} iou:{}".format(loss_coord, loss_confidence, loss_classes, iou_sum, object_num, "nan" if object_num == 0 else (iou_sum / object_num)))
return loss, loss_coord, loss_confidence, loss_classes, iou_sum, object_num
from torch.utils.data import Dataset
import os
import cv2
import xml.etree.ElementTree as ET
import torch
import torchvision.transforms as transforms
class YoloV1DataSet(Dataset):
def __init__(self, imgs_dir="/content/drive/My Drive/VOC2007/Train/JPEGImages", annotations_dir="/content/drive/My Drive/VOC2007/Train/Annotations", img_size=448, S=7, B=2, ClassesFile="/content/drive/My Drive/VOC2007/Train/class.data"): # 图片路径、注解文件路径、图片尺寸、每个grid cell预测的box数量、类别文件
img_names = os.listdir(imgs_dir)
img_names.sort()
self.transfrom = transforms.Compose([
transforms.ToTensor(), # height * width * channel -> channel * height * width
transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))
])
self.img_path = []
for img_name in img_names:
self.img_path.append(os.path.join(imgs_dir,img_name))
annotation_names = os.listdir(annotations_dir)
annotation_names.sort() #图片和文件排序后可以按照相同索引对应
self.annotation_path = []
for annotation_name in annotation_names:
self.annotation_path.append(os.path.join(annotations_dir,annotation_name))
self.img_size = img_size
self.S = S
self.B = B
self.grid_cell_size = self.img_size / self.S
self.ClassNameToInt = {
}
classIndex = 0
with open(ClassesFile, 'r') as f:
for line in f:
line = line.replace('\n','')
self.ClassNameToInt[line] = classIndex #根据类别名制作索引
classIndex = classIndex + 1
print(self.ClassNameToInt)
self.Classes = classIndex # 一共的类别个数
self.getGroundTruth()
# PyTorch 无法将长短不一的list合并为一个Tensor
def getGroundTruth(self):
self.ground_truth = [[[list() for i in range(self.S)] for j in range(self.S)] for k in
range(len(self.img_path))] # 根据标注文件生成ground_truth
ground_truth_index = 0
for annotation_file in self.annotation_path:
ground_truth = [[list() for i in range(self.S)] for j in range(self.S)]
# 解析xml文件--标注文件
tree = ET.parse(annotation_file)
annotation_xml = tree.getroot()
# 计算 目标尺寸 -> 原图尺寸 self.img_size * self.img_size , x的变化比例
width = (int)(annotation_xml.find("size").find("width").text)
scaleX = self.img_size / width
# 计算 目标尺寸 -> 原图尺寸 self.img_size * self.img_size , y的变化比例
height = (int)(annotation_xml.find("size").find("height").text)
scaleY = self.img_size / height
# 因为两次除法的误差可能比较大 这边采用除一次乘一次的方式
# 一个注解文件可能有多个object标签,一个object标签内部包含一个bnd标签
objects_xml = annotation_xml.findall("object")
for object_xml in objects_xml:
# 获取目标的名字
class_name = object_xml.find("name").text
if class_name not in self.ClassNameToInt: # 不属于我们规定的类
continue
bnd_xml = object_xml.find("bndbox")
# 目标尺度放缩
xmin = (int)((int)(bnd_xml.find("xmin").text) * scaleX)
ymin = (int)((int)(bnd_xml.find("ymin").text) * scaleY)
xmax = (int)((int)(bnd_xml.find("xmax").text) * scaleX)
ymax = (int)((int)(bnd_xml.find("ymax").text) * scaleY)
# 目标中心点
centerX = (xmin + xmax) / 2
centerY = (ymin + ymax) / 2
# 当前物体的中心点落于 第indexI行 第indexJ列的 grid cell内
indexI = (int)(centerY / self.grid_cell_size)
indexJ = (int)(centerX / self.grid_cell_size)
# 真实物体的list
ClassIndex = self.ClassNameToInt[class_name]
ClassList = [0 for i in range(self.Classes)]
ClassList[ClassIndex] = 1
ground_box = list([centerX / self.grid_cell_size - indexJ,centerY / self.grid_cell_size - indexI,(xmax-xmin)/self.img_size,(ymax-ymin)/self.img_size,1,xmin,ymin,xmax,ymax,(xmax-xmin)*(ymax-ymin)])
#增加上类别
ground_box.extend(ClassList)
ground_truth[indexI][indexJ].append(ground_box)
#同一个grid cell内的多个groudn_truth,选取面积最大的两个
for i in range(self.S):
for j in range(self.S):
if len(ground_truth[i][j]) == 0:
self.ground_truth[ground_truth_index][i][j].append([0 for i in range(10 + self.Classes)])
else:
ground_truth[i][j].sort(key = lambda box: box[9], reverse=True)
self.ground_truth[ground_truth_index][i][j].append(ground_truth[i][j][0])
ground_truth_index = ground_truth_index + 1
self.ground_truth = torch.Tensor(self.ground_truth).float()
def __getitem__(self, item):
# height * width * channel
img_data = cv2.imread(self.img_path[item])
img_data = cv2.resize(img_data, (448, 448), interpolation=cv2.INTER_AREA)
img_data = self.transfrom(img_data)
return img_data,self.ground_truth[item]
def __len__(self):
return len(self.img_path)
#---------------step1:Dataset-------------------
import torch
dataSet = YoloV1DataSet()
from torch.utils.data import DataLoader
dataLoader = DataLoader(dataSet,batch_size=32,shuffle=True,num_workers=4)
#---------------step2:Model-------------------
Yolo = YOLO_V1().cuda(device=0)
Yolo.initialize_weights()
#---------------step3:LossFunction-------------------
loss_function = Yolov1_Loss().cuda(device=1)
#---------------step4:Optimizer-------------------
import torch.optim as optim
optimizer = optim.SGD(Yolo.parameters(),lr=5e-3,momentum=0.9,weight_decay=0.0005)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[200,400,600,20000,30000],gamma=0.9)
#--------------step5:Tensorboard Feature Map------------
from tensorboardX import SummaryWriter
import torchvision.utils as vutils
import torch.nn as nn
writer = SummaryWriter('log')
def feature_map_visualize(img_data, writer):
img_data = img_data.unsqueeze(0)
img_grid = vutils.make_grid(img_data, normalize=True, scale_each=True)
for i,m in enumerate(Yolo.modules()):
if isinstance(m, nn.Conv2d) or isinstance(m, nn.BatchNorm2d) or \
isinstance(m, nn.ReLU) or isinstance(m, nn.MaxPool2d) or isinstance(m, nn.AdaptiveAvgPool2d):
img_data = m(img_data)
img_data = img_data.permute(1,0,2,3)
img_grid = vutils.make_grid(img_data, normalize=True, scale_each=True)
img_data = img_data.permute(1, 0, 2, 3)
writer.add_image('feature_map', img_grid)
#---------------step6:Train-------------------
epoch = 1
while epoch <= 2000*dataSet.Classes:
loss_sum = 0
loss_coord = 0
loss_confidence = 0
loss_classes = 0
epoch_iou = 0
epoch_object_num = 0
scheduler.step()
for batch_index, batch_train in enumerate(dataLoader):
optimizer.zero_grad()
train_data = batch_train[0].float().cuda(device=0)
train_data.requires_grad = True
label_data = batch_train[1].float().cuda(device=0)
loss = loss_function(bounding_boxes=Yolo(train_data),ground_truth=label_data)
loss_coord += loss[0]
loss_confidence += loss[1]
loss_classes += loss[2]
batch_loss = loss[0] + loss[1] + loss[2]
epoch_iou += loss[3]
epoch_object_num += loss[4]
batch_loss.backward()
optimizer.step()
loss_sum += batch_loss.item()
#writer.add_graph(loss_function, (Yolo(train_data),label_data))
for name, param in Yolo.named_parameters():
print("name:{} param:{}".format(name, param.grad))
print("batch_index : {} ; batch_loss : {}".format(batch_index,batch_loss.item()))
epoch += 1
if (epoch < 1000 and epoch % 100 == 0) or epoch % 1000 == 0:
torch.save(Yolo.state_dict(), './YOLO_V1_' + str(epoch) + '.pth')
print("epoch : {} ; loss : {}".format(epoch,{
loss_sum}))
for name, layer in Yolo.named_parameters():
writer.add_histogram(name + '_grad', layer.grad.cpu().data.numpy(), epoch)
writer.add_histogram(name + '_data', layer.cpu().data.numpy(), epoch)
#feature_map_visualize(batch_train[0], writer)
writer.add_scalar('Train/Loss_sum', loss_sum, epoch + 1)
writer.add_scalar('Train/Loss_coord', loss_coord, epoch + 1)
writer.add_scalar('Train/Loss_confidenct', loss_confidence, epoch + 1)
writer.add_scalar('Train/Loss_classes', loss_classes, epoch + 1)
writer.add_scalar('Train/Epoch_iou', epoch_iou / epoch_object_num, epoch + 1)
writer.close()
# 网络加载
Yolo_V1 = YOLO_V1()
import torch
Yolo_V1.load_state_dict(torch.load('/content/drive/My Drive/YOLO_V1_41.pth'))
Yolo_V1 = Yolo_V1.cuda()
# 类别与索引转换
IndexToClassName = {
}
with open("/content/drive/My Drive/VOC2007/Train/class.data","r") as f:
index = 0
for line in f:
IndexToClassName[index] = line
index = index + 1
def iou(box_one, box_two):
LX = max(box_one[0], box_two[0])
LY = max(box_one[1], box_two[1])
RX = min(box_one[2], box_two[2])
RY = min(box_one[3], box_two[3])
if LX >= RX or LY >= RY:
return 0
return (RX - LX) * (RY - LY) / ((box_one[2]-box_one[0]) * (box_one[3] - box_one[1]) + (box_two[2]-box_two[0]) * (box_two[3] - box_two[1]))
import numpy as np
def NMS(bounding_boxes,S=7,B=2,img_size=448,confidence_threshold=0.5,iou_threshold=0.7):
bounding_boxes = bounding_boxes.cpu().detach().numpy().tolist()
predict_boxes = []
nms_boxes = []
grid_size = img_size / S
for batch in range(len(bounding_boxes)):
for i in range(S):
for j in range(S):
gridX = grid_size * i
gridY = grid_size * j
if bounding_boxes[batch][i][j][4] < bounding_boxes[batch][i][j][9]:
bounding_box = bounding_boxes[batch][i][j][5:10]
else:
bounding_box = bounding_boxes[batch][i][j][0:5]
bounding_box.extend(bounding_boxes[batch][i][j][10:])
if bounding_box[4] >= confidence_threshold:
predict_boxes.append(bounding_box)
centerX = (int)(gridX + bounding_box[0] * grid_size)
centerY = (int)(gridY + bounding_box[1] * grid_size)
width = (int)(bounding_box[2] * grid_size)
height = (int)(bounding_box[3] * grid_size)
bounding_box[0] = max(0, (int)(centerX - width / 2))
bounding_box[1] = max(0, (int)(centerY - height / 2))
bounding_box[2] = min(img_size - 1, (int)(centerX + width / 2))
bounding_box[3] = min(img_size - 1, (int)(centerY + height / 2))
while len(predict_boxes) != 0:
predict_boxes.sort(key=lambda box:box[4])
assured_box = predict_boxes[0]
temp = []
classIndex = np.argmax(assured_box[5:])
#print("类别索引:{}".format(classIndex))
assured_box[4] = assured_box[4] * assured_box[5 + classIndex] #修正置信度为 物体分类准确度 × 含有物体的置信度
assured_box[5] = classIndex
nms_boxes.append(assured_box)
i = 1
while i < len(predict_boxes):
if iou(assured_box,predict_boxes[i]) <= iou_threshold:
temp.append(predict_boxes[i])
i = i + 1
predict_boxes = temp
return nms_boxes
# 读取测试数据
import cv2
import torch
test_dir = "/content/drive/My Drive/VOC2007/Train/JPEGImages/000005.jpg"
img_data = cv2.imread(test_dir)
img_data = cv2.resize(img_data,(448,448),interpolation=cv2.INTER_AREA)
train_data = torch.Tensor(img_data).float().cuda()
train_data = train_data.resize(1,448,448,3)
bounding_boxes = Yolo_V1(train_data)
NMS_boxes = NMS(bounding_boxes)
for box in NMS_boxes:
print(box)
img_data = cv2.rectangle(img_data, (box[0],box[1]),(box[2],box[3]),(0,255,0),1)
img_data = cv2.putText(img_data, "class:{} confidence:{}".format(IndexToClassName[box[5]],box[4]),(box[0],box[1]),cv2.FONT_HERSHEY_PLAIN,0.5,(0,0,255),1)
cv2.imshow("local",img_data)
cv2.waitKey()