说实话,看配置文件的方式真的不适合阅读代码,自己改写了一下
https://blog.csdn.net/u012863603/article/details/126118799 ,对着这个网络图
# coding=utf-8
import math
import torch
import torch.nn as nn
'''
Author:Don
date:2022/10/17 15:51
desc:
'''
from baseblock import *
from utils import *
class Model(nn.Module):
def __init__(self,classes=2,anchors=None,ch=3):
super(Model, self).__init__()
# self.train=False
na=3*(classes+5)
self.layer0=Conv(ch,32,3,1) # 0
self.layer1=Conv(32,64,3,2) # 1-p1/2
self.layer2=Conv(64,64,3,1)
self.layer3=Conv(64,128,3,2) #3-p2/4
self.layer11=ELAN(128)
self.layer16=MP_1(256)
self.layer24=ELAN(256)
self.layer29=MP_1(512)
self.layer37=ELAN(512)
self.layer42=MP_1(1024)
self.layer50=ELAN_E(1024)
self.layer51=SPPCSPC(1024,512)
self.layer52=Conv(512,256,1,1)
self.upsample=nn.Upsample(None,2,'nearest')
self.concat = Concat(1)
self.layer54=Conv(1024,256,1,1)
self.layer63=ELAN_W(512)
self.layer64=Conv(256,128,1,1)
self.layer66=Conv(512,128,1,1)
self.layer75=ELAN_W(256)
self.layer80=MP_2(128)
self.layer88=ELAN_W(512)
self.layer93=MP_2(256)
self.layer101=ELAN_W(1024)
self.layer102=RepConv(128,256,3,1)
self.layer103=RepConv(256,512,3,1)
self.layer104=RepConv(512,1024,3,1)
self.idetect=IDetect(classes,anchors=anchors,ch=[256,512,1024])
s = 256 # 2x min stride 把256带入求得缩放的倍数
self.idetect.stride=torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])
self.idetect.anchors /= self.idetect.stride.view(-1, 1, 1)
# check_anchor_order(self.idetect)
self.stride = self.idetect.stride
self._initialize_biases() # only run once
# Init weights, biases
initialize_weights(self)
def forward(self,x):
x3=self.layer3(self.layer2(self.layer1(self.layer0(x))))
x24=self.layer24(self.layer16(self.layer11(x3)))
x37=self.layer37(self.layer29(x24))
x51=self.layer51(self.layer50(self.layer42(x37)))
x63=self.layer63(self.concat([self.layer54(x37),self.upsample(self.layer52(x51))]))
x75=self.layer75(self.concat([self.layer66(x24),self.upsample(self.layer64(x63))]))
x88=self.layer88(self.concat([self.layer80(x75),x63]))
x101=self.layer101(self.concat([self.layer93(x88),x51]))
x102=self.layer102(x75)
x103=self.layer103(x88)
x104=self.layer104(x101)
out=self.idetect([x102,x103,x104])
return out
def _initialize_biases(self):
for mi,s in zip(self.idetect.m,self.idetect.stride):
b=mi.bias.view(self.idetect.na,-1) # conv.bias(255) to (3,85)
b.data[:,4]+= math.log(8/(640/s)**2)
b.data[:, 5:] += math.log(0.6 / (self.idetect.nc - 0.99)) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
# device=torch.device('cuda:0')
# anchors=[[12,16, 19,36, 40,28],[36,75, 76,55, 72,146],[142,110, 192,243, 459,401]]
#
# yolov7 = Model(classes=1,anchors=anchors).to(device)
# img = torch.rand( 1, 3, 640, 640).to(device)
# y = yolov7(img)
# print(y[0].shape)
还是对照网格图
链接: link
# coding=utf-8
import torch
import torch.nn as nn
'''
Author:Don
date:2022/10/17 16:24
desc:
'''
def autopad(k,p=None):
if p is None:
p=k//2 if isinstance(k,int) else [x//2 for x in k]
return p
class Conv(nn.Module):
# ch_in, ch_out, kernel, stride, padding, groups
def __init__(self,c1,c2,k=1,s=1,p=None,g=1,act=True):
super(Conv, self).__init__()
self.conv=nn.Conv2d(c1,c2,k,s,autopad(k,p),groups=g,bias=False)
self.bn=nn.BatchNorm2d(c2)
self.act=nn.SiLU() if act is True else (act if isinstance(act,nn.Module) else nn.Identity())
def forward(self,x):
return self.act(self.bn(self.conv(x)))
def fuseforward(self,x):
return self.act(self.conv(x))
class Concat(nn.Module):
def __init__(self,dimension=1):
super(Concat, self).__init__()
self.d=dimension
def forward(self,x):
return torch.cat(x,self.d)
class MP(nn.Module):
def __init__(self,k=2):
super(MP, self).__init__()
self.m=nn.MaxPool2d(kernel_size=k,stride=k)
def forward(self,x):
return self.m(x)
class MP_2(nn.Module):
def __init__(self,c1):
super(MP_2, self).__init__()
self.conv1=Conv(c1,c1,1,1)
self.conv2=Conv(c1,c1,3,2)
self.m=MP()
self.concat=Concat(1)
def forward(self,x):
y1=self.conv1(self.m(x))
y2=self.conv2(self.conv1(x))
return self.concat([y2,y1])
class MP_1(nn.Module):
def __init__(self,c1):
super(MP_1, self).__init__()
c2=c1//2
self.m=MP()
self.conv1=Conv(c1,c2,1,1)
self.conv2=Conv(c2,c2,3,2)
self.concat=Concat(1)
def forward(self,x):
y1=self.conv1(self.m(x))
y2=self.conv2(self.conv1(x))
return self.concat([y1,y2])
class ELAN(nn.Module):
def __init__(self, c1):
super(ELAN, self).__init__()
c2=c1//2
c3=c1*2
self.conv1 = Conv(c1, c2, 1, 1)
self.conv2 = Conv(c2, c2, 3, 1)
self.conv3 = Conv(c3, c3, 1, 1)
self.concat=Concat(1)
def forward(self,x):
y1=self.conv1(x)
y2=self.conv1(x)
y3=self.conv2(self.conv2(y2))
y4=self.conv2(self.conv2(y3))
return self.conv3(self.concat([y4,y3,y2,y1]))
class ELAN_E(nn.Module):
def __init__(self, c1):
super(ELAN_E, self).__init__()
c2=c1//4
c3=c1
self.conv1 = Conv(c1, c2, 1, 1)
self.conv2 = Conv(c2, c2, 3, 1)
self.conv3 = Conv(c3, c3, 1, 1)
self.concat=Concat(1)
def forward(self,x):
y1=self.conv1(x)
y2=self.conv1(x)
y3=self.conv2(self.conv2(y2))
y4=self.conv2(self.conv2(y3))
return self.conv3(self.concat([y4,y3,y2,y1]))
class ELAN_W(nn.Module):
def __init__(self, c1):
super(ELAN_W, self).__init__()
c2=c1//2
c3=c2//2
c4=2*c2+4*c3
self.conv1 = Conv(c1, c2, 1, 1)
self.conv2 = Conv(c2, c3, 3, 1)
self.conv3 = Conv(c3, c3, 3, 1)
self.conv4 = Conv(c4, c4//4, 1, 1)
self.concat=Concat(1)
def forward(self,x):
y1=self.conv1(x)
y2=self.conv1(x)
y3=self.conv2(y2)
y4=self.conv3(y3)
y5=self.conv3(y4)
y6=self.conv3(y5)
return self.conv4(self.concat([y6,y5,y4,y3,y2,y1]))
class SPPCSPC(nn.Module):
def __init__(self,c1,c2,e=0.5,k=(5,9,13)):
super(SPPCSPC, self).__init__()
c_=int(2*c2*e) #hidden channels
self.cv1=Conv(c1,c_,1,1)
self.cv2=Conv(c1,c_,1,1)
self.cv3=Conv(c_,c_,3,1)
self.cv4=Conv(c_,c_,1,1)
self.m=nn.ModuleList([nn.MaxPool2d(kernel_size=x,stride=1,padding=x//2) for x in k])
self.cv5=Conv(4*c_,c_,1,1)
self.cv6=Conv(c_,c_,3,1)
self.cv7=Conv(2*c_,c2,1,1)
def forward(self,x):
x1=self.cv4(self.cv3(self.cv1(x)))
y1=self.cv6(self.cv5(torch.cat([x1]+[m(x1) for m in self.m],1)))
y2=self.cv2(x)
return self.cv7(torch.cat((y1,y2),dim=1))
class RepConv(nn.Module):
def __init__(self,c1,c2,k=3,s=1,p=None,g=1,act=True,deploy=False):
super(RepConv, self).__init__()
self.deploy=deploy
self.groups=g
self.in_channels=c1
self.out_channels=c2
padding_11=autopad(k,p)-k//2
self.act=nn.SiLU() if act is True else (act if isinstance(act,nn.Module) else nn.Identity())
if deploy:
self.rbr_reparam=nn.Conv2d(c1,c2,k,s,autopad(k,p),groups=g,bias=True)
else:
self.rbr_dense = nn.Sequential(
nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False),
nn.BatchNorm2d(num_features=c2),
)
self.rbr_1x1 = nn.Sequential(
nn.Conv2d(c1, c2, 1, s, padding_11, groups=g, bias=False),
nn.BatchNorm2d(num_features=c2),
)
def forward(self,x):
if hasattr(self, "rbr_reparam"):
return self.act(self.rbr_reparam(x))
return self.act(self.rbr_dense(x) + self.rbr_1x1(x))
class ImplicitA(nn.Module):
def __init__(self,channel,mean=0.,std=.02):
super(ImplicitA, self).__init__()
self.channel=channel
self.mean=mean
self.std=std
self.implicit=nn.Parameter(torch.zeros(1,channel,1,1))
nn.init.normal_(self.implicit,mean=self.mean,std=std)
def forward(self,x):
return self.implicit+x
class ImplicitM(nn.Module):
def __init__(self,channel,mean=0,std=.02):
super(ImplicitM, self).__init__()
self.channel=channel
self.mean=mean
self.std=std
self.implicit=nn.Parameter(torch.ones(1,channel,1,1))
nn.init.normal_(self.implicit,mean=self.mean,std=self.std)
def forward(self,x):
return self.implicit*x
class IDetect(nn.Module):
stride = None # strides computed during build
export = False # onnx export
def __init__(self,nc=80,anchors=(),ch=()):
super(IDetect, self).__init__()
self.nc=nc
self.no=nc+6
self.nl=len(anchors) #3
self.na = len(anchors[0]) // 2 # number of anchors 3
self.grid=[torch.zeros(1)]*self.nl
a = torch.tensor(anchors).float().view(self.nl, -1, 2) # 3,3,2
self.register_buffer('anchors', a) # shape(nl,na,2)
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)
self.ia=nn.ModuleList(ImplicitA(x) for x in ch)
self.im=nn.ModuleList(ImplicitM(self.no*self.na) for _ in ch)
# self.training=True
def forward(self,x):
z=[]
self.training|=self.export
for i in range(self.nl):
x[i]=self.m[i](self.ia[i](x[i]))
x[i]=self.im[i](x[i])
bs,_,ny,nx=x[i].shape
x[i]=x[i].view(bs,self.na,self.no,ny,nx).permute(0,1,3,4,2).contiguous()
if not self.training: # inference
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
y = x[i].sigmoid()
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
z.append(y.view(bs, -1, self.no))
return x if self.training else (torch.cat(z, 1), x)
@staticmethod
def _make_grid(nx=20, ny=20):
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
# anchors=[[12,16, 19,36, 40,28],[36,75, 76,55, 72,146],[142,110, 192,243, 459,401]]
# ch=[256,512,1024]
# d=IDetect(anchors=anchors)
# implicit=nn.Parameter(torch.zeros(1,3,1,1))
# nn.init.normal_(implicit,mean=0.,std=.02)
# print(implicit)
因为太复杂,且我的数据集很小所以去掉了mosaic和mixup,只有简单的旋转翻转,且读取的数据格式是通过labelimg直接标注的,不用再转化为yolov5标签格式
# coding=utf-8
import os
import random
import sys
path = os.path.dirname(__file__)
sys.path.append(path)
'''
Author:Don
date:2022/10/19 15:00
desc:
'''
from utils import *
from torch.utils.data import Dataset
from pathlib import Path
import glob
from tqdm import tqdm
from PIL import Image
import json
import numpy as np
import cv2
def img2label_paths(img_paths):
# Define label paths as a function of image paths
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
return ['json'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) for x in img_paths]
def xyxy2xywh(x):
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
y[:, 2] = (x[:, 2] - x[:, 0]) # width
y[:, 3] = (x[:, 3] - x[:, 1]) # height
return y
def segments2boxes(segments,img_h,img_w):
# Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
boxes = []
for s in segments:
x, y = s.T # segment xy
boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
y=xyxy2xywh(np.array(boxes)) # cls, xywh
y[:,[1, 3]] /= img_h # normalized height 0-1
y[:,[0, 2]] /= img_w # normalized width 0-1
return y
def load_image(self, index):
img = self.imgs[index]
if img is None:
path=self.img_files[index]
img=cv2.imread(path)
h0,w0=img.shape[:2]
r=self.img_size/max(h0,w0)
if r!=1:
interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
return img,(h0,w0),img.shape[:2]
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), scaleup=True, stride=32):
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
ratio=r,r
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding 640-640 640-512
dw /= 2 # divide padding into 2 sides
dh /= 2
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return img, ratio, (dw, dh)
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x
y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y
y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x
y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y
return y
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
border=(0, 0)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
height = img.shape[0] + border[0] * 2 # shape(h,w,c)
width = img.shape[1] + border[1] * 2
# Center
C = np.eye(3)
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
# Perspective
P = np.eye(3)
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
# Rotation and Scale
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1 - scale, 1.1 + scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
# Shear
S = np.eye(3)
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
# Translation
T = np.eye(3)
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
# Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
if perspective:
img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
else: # affine
img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
# Visualize
# import matplotlib.pyplot as plt
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
# ax[0].imshow(img[:, :, ::-1]) # base
# ax[1].imshow(img2[:, :, ::-1]) # warped
# Transform label coordinates
n = len(targets)
xy = np.ones((n * 4, 3))
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
# filter candidates
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.10)
targets = targets[i]
targets[:, 1:5] = new[i]
return img, targets
def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
dtype = img.dtype # uint8
x = np.arange(0, 256, dtype=np.int16)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
class LoadImagesAndLabels(Dataset):
def __init__(self,path,img_size=640,batch_size=16,augment=False,image_weights=False, single_cls=False, stride=32, pad=0.0):
self.img_size=img_size
self.augment=augment
self.image_weights=image_weights
self.stride=stride
self.path=path
try:
f=[]
for p in path if isinstance(path,list) else[path]:
p = Path(p) # os-agnostic
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
self.img_files = sorted([x.replace('/', os.sep) for x in f])
except Exception as e:
pass
self.label_files = img2label_paths(self.img_files) # labels
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
if cache_path.is_file():
cache, exists = torch.load(cache_path), True # load
else:
cache, exists = self.cache_labels(cache_path), False # cache
labels, shapes, self.segments = zip(*cache.values())
self.labels = list(labels)
self.shapes = np.array(shapes, dtype=np.float64)
self.img_files = list(cache.keys()) # update
self.label_files = img2label_paths(cache.keys()) # update
if single_cls:
for x in self.labels:
x[:, 0] = 0
n=len(shapes)
bi=np.floor(np.arange(n)/batch_size).astype(np.int32)
nb=bi[-1]+1
self.batch=bi
self.n=n
self.indices=range(n)
self.imgs=[None]*n
def __len__(self):
return len(self.img_files)
def __getitem__(self, item):
index=self.indices[item]
img,(h0,w0),(h,w)=load_image(self,index)
shape=self.img_size
img, ratio, pad = letterbox(img, shape, scaleup=self.augment) #没有缩放只是增加了pad
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
labels = self.labels[index].copy()
if labels.size:
labels[:,1:]=xywhn2xyxy(labels[:,1:],ratio[0]*w,ratio[1]*h,padw=pad[0],padh=pad[1])
if self.augment:
# Augment imagespace
img, labels = random_perspective(img, labels,
degrees=0.0,
translate=0.2,
scale=0.9,
shear=0.0,
perspective=0.0)
# img, labels = self.albumentations(img, labels)
# Augment colorspace
augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4)
nL = len(labels) # number of labels
if nL:
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
if self.augment:
# flip left-right
if random.random() < 0.5:
img = np.fliplr(img)
if nL:
labels[:, 1] = 1 - labels[:, 1]
labels_out = torch.zeros((nL, 6))
if nL:
labels_out[:, 1:] = torch.from_numpy(labels)
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)
return torch.from_numpy(img), labels_out, self.img_files[index], shapes
@staticmethod
def collate_fn(batch):
img, label, path, shapes = zip(*batch) # transposed
for i, l in enumerate(label):
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img, 0), torch.cat(label, 0), path, shapes
def cache_labels(self, cache_path):
x={}
pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
for i ,(im_file,lb_file) in enumerate(pbar):
try:
im=Image.open(im_file)
im.verify()
shape = im.size # image size
segments = [] # instance segments
if os.path.isfile(lb_file):
with open(lb_file,'r')as f:
j = json.load(f)
img_h = j["imageHeight"]
img_w = j["imageWidth"]
classes = np.array([0 for x in j["shapes"] if x["label"]=="sack"], dtype=np.float32)
segments = [np.array(x["points"], dtype=np.float32).reshape(-1, 2) for x in j["shapes"] ]
l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments,img_h,img_w)), 1)
l = np.array(l, dtype=np.float32)
else:
l = np.zeros((0, 5), dtype=np.float32)
x[im_file] = [l, shape, segments]
except Exception as e:
pass
pbar.close()
path = str(cache_path).replace('/', os.sep)
torch.save(x, path) # save for next time
return x
def create_dataloader(path, imgsz, batch_size, stride,single_cls, augment=True, pad=0.0,world_size=1, workers=8, image_weights=False):
dataset = LoadImagesAndLabels(path, imgsz, batch_size,
augment=augment, # augment images
single_cls=single_cls,
stride=int(stride),
pad=pad,
image_weights=image_weights)
batch_size = min(batch_size, len(dataset))
nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers])
loader = torch.utils.data.DataLoader
dataloader = loader(dataset,
batch_size=batch_size,
num_workers=nw,
sampler=None,
pin_memory=True,
collate_fn=LoadImagesAndLabels.collate_fn)
return dataloader, dataset
img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']
# path='./data/kongdong/images'
# create_dataloader(path,640,1,32,True)
class LoadImages:
def __init__(self,path,img_size=640):
p = str(Path(path).absolute()) # os-agnostic absolute path
if '*' in p:
files = sorted(glob.glob(p, recursive=True)) # glob
elif os.path.isdir(p):
files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
elif os.path.isfile(p):
files = [p] # files
else:
raise Exception(f'ERROR: {p} does not exist')
images = [x for x in files if x.split('.')[-1].lower() in img_formats]
ni = len(images)
self.img_size=img_size
self.files=images
self.nf=ni
def __iter__(self):
self.count=0
return self
def __next__(self):
if self.count==self.nf:
raise StopIteration
path = self.files[self.count]
self.count+=1
img0=cv2.imread(path)
img=letterbox(img0,self.img_size)[0]
img=img[:,:,::-1].transpose(2,0,1)
img=np.ascontiguousarray(img)
return path,img,img0
个人增加了注释,且去掉了cls的loss,因为我只有一个类别
# coding=utf-8
import os
import sys
import math
import numpy as np
import torch
import torch.nn.functional as F
path = os.path.dirname(__file__)
sys.path.append(path)
import torch.nn as nn
'''
Author:Don
date:2022/10/21 14:28
desc:
'''
def xywh2xyxy(x):
y=x.clone() if isinstance(x,torch.Tensor) else np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def box_iou(box1, box2):
def box_area(box):
# box = 4xn
return (box[2] - box[0]) * (box[3] - box[1])
area1=box_area(box1.T)
area2=box_area(box2.T)
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
box2=box2.T
if x1y1x2y2: # x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else: # transform from xywh to xyxy
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
# Intersection area
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
union = w1 * h1 + w2 * h2 - inter + eps
iou = inter / union
if GIoU or DIoU or CIoU:
cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
(b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared
if DIoU:
return iou - rho2 / c2 # DIoU
elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps)), 2)
with torch.no_grad():
alpha = v / (v - iou + (1 + eps))
return iou - (rho2 / c2 + v * alpha) # CIoU
else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
c_area = cw * ch + eps # convex area
return iou - (c_area - union) / c_area # GIoU
else:
return iou # IoU
class ComputeLossOTA:
def __init__(self,model,autobalance=False):
super(ComputeLossOTA, self).__init__()
device=next(model.parameters()).device
BCEcls=nn.BCEWithLogitsLoss(pos_weight=torch.tensor(1.0,device=device))
BCEobj=nn.BCEWithLogitsLoss(pos_weight=torch.tensor(1.0,device=device))
det=model.idetect
self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7
self.BCEcls,self.BCEobj=BCEcls,BCEobj
for k in 'na','nc','nl','anchors','stride':
setattr(self,k,getattr(det,k))
def __call__(self, p,targets,imgs):
device=targets.device
lcls,lbox,lobj=torch.zeros(1,device=device),torch.zeros(1,device=device),torch.zeros(1,device=device)
bs, as_, gjs, gis, targets, anchors = self.build_targets(p, targets, imgs)
pre_gen_gains=[torch.tensor(pp.shape,device=device)[[3,2,3,2]] for pp in p] # [80,80,80,80,][40,40,40,40,][20,20,20,20]
#loss
for i,pi in enumerate(p):
b, a, gj, gi = bs[i], as_[i], gjs[i], gis[i] # image, anchor, gridy, gridx
tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
n = b.shape[0] # number of targets
if n:
ps=pi[b,a,gj,gi]
# regression 回归box
grid=torch.stack([gi,gj],dim=1)
pxy=ps[:,:2].sigmoid()*2-0.5
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1) # predicted box
selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i] # xywh * 80,40,20
selected_tbox[:, :2] -= grid #计算的是中心点到左上角网格的距离,所以这里的真实标签要减去网格的xy
iou = bbox_iou(pbox.T, selected_tbox, x1y1x2y2=False, CIoU=True) # iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss
# Objectness
tobj[b, a, gj, gi]=iou.detach().clamp(0).type(tobj.dtype) #通过iou判断是否有物体,iou 【0,1】之间
obji=self.BCEobj(pi[...,4],tobj) #获取obj loss
lobj += obji * self.balance[i] # obj loss 每一层的loss比例不一样 给低层的loss比例高一点 是为了检测小物体
lbox *= 0.05
lobj *= 0.7
bs=tobj.shape[0]
loss=lbox+lobj
return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
def build_targets(self, p, targets, imgs):
indices, anch = self.find_3_positive(p, targets) # 增加true label 个数
matching_bs = [[] for pp in p]
matching_as = [[] for pp in p]
matching_gjs = [[] for pp in p]
matching_gis = [[] for pp in p]
matching_targets = [[] for pp in p]
matching_anchs = [[] for pp in p]
nl = len(p)
for batch_idx in range(p[0].shape[0]):
b_idx=targets[:,0]==batch_idx
this_target=targets[b_idx]
if this_target.shape[0] == 0:
continue
txywh=this_target[:,2:6]*imgs[batch_idx].shape[1]
txyxy=xywh2xyxy(txywh)
pxyxys = []
p_cls = []
p_obj = []
from_which_layer = []
all_b = []
all_a = []
all_gj = []
all_gi = []
all_anch = []
for i,pi in enumerate(p): #获取每一层预测值
b, a, gj, gi = indices[i] #获取对应层的真实标签落在的中心坐标点
idx = (b == batch_idx)
b, a, gj, gi = b[idx], a[idx], gj[idx], gi[idx]
all_b.append(b)
all_a.append(a)
all_gj.append(gj)
all_gi.append(gi)
all_anch.append(anch[i][idx])
from_which_layer.append(torch.ones(size=(len(b),)) * i)
fg_pred = pi[b, a, gj, gi] # b batch_size a 对应的anchor gj gi 就是对应中心点的xy 从1,3,80,80,6(5+cls) 取出 6,6 这一层只有6个和真实标签
p_obj.append(fg_pred[:, 4:5])
p_cls.append(fg_pred[:, 5:6])
grid = torch.stack([gi, gj], dim=1) # 物体的中心点所在的网格
pxy=(fg_pred[:,:2].sigmoid()*2-0.5+grid)*self.stride[i] #预测的是中心点到网格的偏移
pwh = (fg_pred[:, 2:4].sigmoid() * 2) ** 2 * anch[i][idx] * self.stride[i] # / 8. #预测的是anch的比值
pxywh=torch.cat([pxy,pwh],dim=-1)
pxyxy=xywh2xyxy(pxywh)
pxyxys.append(pxyxy)
pxyxys=torch.cat(pxyxys,dim=0)
if pxyxys.shape[0] == 0:
continue
p_obj = torch.cat(p_obj, dim=0)
p_cls = torch.cat(p_cls, dim=0)
from_which_layer = torch.cat(from_which_layer, dim=0)
all_b = torch.cat(all_b, dim=0)
all_a = torch.cat(all_a, dim=0)
all_gj = torch.cat(all_gj, dim=0)
all_gi = torch.cat(all_gi, dim=0)
all_anch = torch.cat(all_anch, dim=0)
pair_wise_iou=box_iou(txyxy,pxyxys)
pair_wise_iou_loss=-torch.log(pair_wise_iou+1e-8)
top_k,_=torch.topk(pair_wise_iou,min(10,pair_wise_iou.shape[1]),dim=1) # 最多取10个最大的iou
dynamic_ks=torch.clamp(top_k.sum(1).int(),min=1) #计算k的取值 这里有5个gt,得到的k是【4,3,3,4,4】
gt_cls_per_image=(F.one_hot(this_target[:,1].to(torch.int64),self.nc).float().unsqueeze(1).repeat(1,pxyxys.shape[0],1)) #5,60,1
num_gt=this_target.shape[0] # 5
cls_preds_=(p_cls.float().unsqueeze(0).repeat(num_gt,1,1).sigmoid_()*p_obj.unsqueeze(0).repeat(num_gt,1,1).sigmoid_()) #5,60,1
y=cls_preds_.sqrt_()
pair_wise_cls_loss=F.binary_cross_entropy_with_logits(torch.log(y/(1-y)),gt_cls_per_image,reduction="none").sum(-1)
del cls_preds_
cost=(pair_wise_cls_loss+3.0*pair_wise_iou_loss) #5,60 5是gt label 60 是根据gt label在 9个anchor的5个网格的中心点
matching_matrix=torch.zeros_like(cost) # 5,60
for gt_idx in range(num_gt):
_,pos_idx=torch.topk(cost[gt_idx],k=dynamic_ks[gt_idx].item(),largest=False) #取k个最大的loss
matching_matrix[gt_idx][pos_idx]=1.0
del top_k,dynamic_ks
anchor_matching_gt=matching_matrix.sum(0)
# 大于1 说明一个网格对应了2个gt
if (anchor_matching_gt>1).sum()>0:
_,cost_argmin=torch.min(cost[:,anchor_matching_gt>1],dim=0) #计算grid和gt loss最小的的是哪一个
matching_matrix[:, anchor_matching_gt > 1] *= 0.0 # 对应网格的变成0
matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1.0 # 把loss最小的grid 变成1
fg_mask_inboxes=matching_matrix.sum(0)>0.0 #动态提取的top k 个 因为有可能一个grid对应2个gt所以 这里的k 小于等于dynamic_ks
matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0) #
from_which_layer = from_which_layer[fg_mask_inboxes]
all_b = all_b[fg_mask_inboxes]
all_a = all_a[fg_mask_inboxes]
all_gj = all_gj[fg_mask_inboxes]
all_gi = all_gi[fg_mask_inboxes]
all_anch = all_anch[fg_mask_inboxes]
this_target=this_target[matched_gt_inds]
for i in range(nl):
layer_idx=from_which_layer==i
matching_bs[i].append(all_b[layer_idx])
matching_as[i].append(all_a[layer_idx])
matching_gis[i].append(all_gi[layer_idx])
matching_gjs[i].append(all_gj[layer_idx])
matching_targets[i].append(this_target[layer_idx])
matching_anchs[i].append(all_anch[layer_idx])
for i in range(nl):
if matching_targets[i] != []:
matching_bs[i] = torch.cat(matching_bs[i], dim=0)
matching_as[i] = torch.cat(matching_as[i], dim=0)
matching_gjs[i] = torch.cat(matching_gjs[i], dim=0)
matching_gis[i] = torch.cat(matching_gis[i], dim=0)
matching_targets[i] = torch.cat(matching_targets[i], dim=0)
matching_anchs[i] = torch.cat(matching_anchs[i], dim=0)
else:
matching_bs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
matching_as[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
matching_gjs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
matching_gis[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
matching_targets[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
matching_anchs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs
def find_3_positive(self, p, targets):
na,nt=self.na,targets.shape[0] # nt 一个图像中多少个物体 5个
indices,anch=[],[]
gain=torch.ones(7,device=targets.device).long()
ai=torch.arange(na,device=targets.device).float().view(na,1).repeat(1,nt) # ai=[[0,0,0,0,0],[1,1,1,1,1],[2,2,2,2,2]]
targets=torch.cat((targets.repeat(na,1,1),ai[:,:,None]),2) # shape=3,5,7 5个检测五 3 是3层 7是0,0,cls ,x,y,w,h,
g=0.5
off = torch.tensor([[0, 0],[1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device).float() * g # offsets
# p [[1,3,80,80,6],[1,3,40,40,6],[1,3,20,20,6] ]
for i in range(self.nl): # 3 层
anchors=self.anchors[i] # 每层对应3个anchors
gain[2:6]=torch.tensor(p[i].shape)[[3,2,3,2]] # xyxy gain # 1,1,80,80,80,80,1
t=targets*gain #target *80
if nt:
r=t[:,:,4:6]/anchors[:,None] # 计算真实框和anchor的wh的比值
j = torch.max(r, 1. / r).max(2)[0] < 4.0 # 筛选比值小于4的真实框
t = t[j] # filter
gxy=t[:,2:4]
gxi = gain[[2, 3]] - gxy # xy相反的坐标inverse 80*80-真实的框在80*80的下的xy坐标
j, k = ((gxy % 1. < g) & (gxy > 1.)).T # 如果真实框在左上角且不是第一行第一列 就取上方和左侧作为真实标签
l, m = ((gxi % 1. < g) & (gxi > 1.)).T # 如果真实框在右下角且不是最后一行最后一列 就取下方和右侧作为
j = torch.stack((torch.ones_like(j), j, k, l, m)) # 选出符合
t = t.repeat((5, 1, 1))[j] # 复制5分 筛选符合要求的中心点的上下左右加上中心点5个坐标点
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] # 真实中心点复制5分分别求出5个点的坐标
else:
t=targets[0]
offsets=0
b,c=t[:,:2].long().T # batch clss
gxy=t[:,2:4]
gwh=t[:,4:6]
gij=(gxy-offsets).long()
gi,gj=gij.T
a=t[:,6].long()
indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
anch.append(anchors[a]) # anchors
return indices,anch
没有多gpu训练,所以去掉了分布式训练,也是复杂的不行,一堆参数
# coding=utf-8
import os
import sys
from tqdm import tqdm
path = os.path.dirname(__file__)
sys.path.append(path)
from model import Model
import torch
import torch.optim as optim
import torch.nn as nn
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np
from datasets import create_dataloader
from torch.cuda import amp
from loss import ComputeLossOTA
'''
Author:Don
date:2022/10/19 12:01
desc:
'''
def train():
epochs=300
imgsz=640
batch_size=1
train_path='./data/kongdong/images'
classes=1
# Optimizer
nbs = 64 # nominal batch size
accumulate = max(round(nbs / 1), 1) # accumulate loss before optimizing
device = torch.device('cuda:0')
anchors = [[12, 16, 19, 36, 40, 28], [36, 75, 76, 55, 72, 146], [142, 110, 192, 243, 459, 401]]
yolov7 = Model(classes=classes, anchors=anchors).to(device)
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
for k, v in yolov7.named_modules():
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
pg2.append(v.bias) # biases
if isinstance(v, nn.BatchNorm2d):
pg0.append(v.weight) # no decay
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
pg1.append(v.weight) # apply decay
if hasattr(v, 'im'):
if hasattr(v.im, 'implicit'):
pg0.append(v.im.implicit)
else:
for iv in v.im:
pg0.append(iv.implicit)
optimizer = optim.SGD(pg0, lr=0.01, momentum=0.937, nesterov=True)
optimizer.add_param_group({'params': pg1, 'weight_decay': 0.0005}) # add pg1 with weight_decay
optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
del pg0, pg1, pg2
lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - 0.1) + 0.1 # linear
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
gs=max(int(yolov7.stride.max()),32) #32
nl=yolov7.idetect.nl
mloss = torch.zeros(4, device=device) # mean losses
single_cls=False
# Trainloader
if classes ==1 :
single_cls=True
dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs,single_cls, augment=True,image_weights=True)
mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class
nb = len(dataloader)
compute_loss_ota = ComputeLossOTA(yolov7) # init loss class
scaler = amp.GradScaler(enabled=device)
for epoch in range(0, epochs): # epoch ------------------------------------------------------------------
yolov7.train()
pbar = enumerate(dataloader)
pbar = tqdm(pbar, total=nb) # progress bar
optimizer.zero_grad()
for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
ni = i + nb * epoch # number integrated batches (since train start)
imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0
# Forward
with amp.autocast(enabled=True):
pred = yolov7(imgs) # forward
# print(pred[0].shape)
loss, loss_items = compute_loss_ota(pred, targets.to(device), imgs) # loss scaled by batch_size
# if rank != -1:
# loss *= opt.world_size # gradient averaged between devices in DDP mode
# if opt.quad:
# loss *= 4.
# Backward
scaler.scale(loss).backward()
# Optimize
if ni % accumulate == 0:
scaler.step(optimizer) # optimizer.step
scaler.update()
optimizer.zero_grad()
mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB)
s = ('%10s' * 2 + '%10.4g' * 6) % (
'%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
pbar.set_description(s)
# Scheduler
lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard
scheduler.step()
torch.save(yolov7,'last.pt')
train()
# coding=utf-8
import os
import sys
path = os.path.dirname(__file__)
sys.path.append(path)
import torch
from datasets import LoadImages,non_max_suppression
'''
Author:Don
date:2022/10/26 14:25
desc:
'''
from pathlib import Path
source='data/kongdong/images'
imgsz=640
dataset = LoadImages(source, img_size=imgsz)
device=torch.device('cuda:0')
def clip_coords(boxes, img_shape):
# Clip bounding xyxy bounding boxes to image shape (height, width)
boxes[:, 0].clamp_(0, img_shape[1]) # x1
boxes[:, 1].clamp_(0, img_shape[0]) # y1
boxes[:, 2].clamp_(0, img_shape[1]) # x2
boxes[:, 3].clamp_(0, img_shape[0]) # y2
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2]] -= pad[0] # x padding
coords[:, [1, 3]] -= pad[1] # y padding
coords[:, :4] /= gain
clip_coords(coords, img0_shape)
return coords
import random
import cv2
def plot_one_box(x, img, color=None, label=None, line_thickness=3):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
def detect():
model = torch.load("last.pt", map_location=device) # load FP32 model
model.eval()
for path, img, im0s in dataset:
img = torch.from_numpy(img).to(device)
img=img.unsqueeze(0)
img = img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
pred = model(img)[0]
pred = non_max_suppression(pred, 0.85, 0.4, classes=1)
for i,det in enumerate(pred):
p, s, im0= path, '', im0s
p = Path(p) # to Path
save_path = str(p.name) # img.jpg
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {'box'}{'s' * (n > 1)}, " # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
label = f'{conf:.2f}'
plot_one_box(xyxy, im0)
cv2.imshow(str(p), im0)
cv2.waitKey() # 1 millisecond
detect()
只是为了理解yolov7,本身不能用于训练,效果奇差!!
只是为了理解yolov7,本身不能用于训练,效果奇差!!
只是为了理解yolov7,本身不能用于训练,效果奇差!!