python 3.8
mxnet 1.7.0
cuda 10.1
这时候,需要引入图像领域另一个重要任务:物体的检测与识别。在传统机器领域,一个典型的案例是利用HOG(Histogram of Gradient)特征来生成各种物体相应的“滤波器”,HOG滤波器能完整的记录物体的边缘和轮廓信息,利用这一滤波器过滤不同图片的不同位置,当输出响应值幅度超过一定阈值,就认为滤波器和图片中的物体匹配程度较高,从而完成了物体的检测。
pip install labelimg
import os,sys,cv2,json,colorsys,time
import mxnet as mx
import numpy as np
from mxnet import nd, autograd, gluon
from data.data_params import Create_pascol_voc_Data
from data.data_loader import YoloV5DataSet
from core.loss import ComputeLoss, build_targets
from nets.yolo import yolov5
from utils.utils_data import non_max_suppression, scale_coords, xywh2xyxy, process_batch, ap_per_class
from PIL import Image,ImageDraw,ImageFont
self.ctx = [mx.gpu(int(i)) for i in USEGPU.split(',') if i.strip()]
self.ctx = self.ctx if self.ctx else [mx.cpu()]
class YoloV5DataSet(gluon.data.Dataset):
def __init__(self,DataDir='./dataset/train',classes=80,img_sizes=640,shuffle=True,mode = 'train'):
super(YoloV5DataSet, self).__init__()
self.img_files = []
self.lbl_files = []
for f in os.listdir(os.path.join(DataDir,"images",mode + "_data")):
if not os.path.isfile(os.path.join(DataDir,"images",mode + "_data",f)):
self.img_files.append(os.path.join(DataDir, "images",mode + "_data", f))
self.lbl_files.append(os.path.join(DataDir, "labels",mode + "_data", f.split(".")[0]+".txt"))
self.len = len(self.img_files)
self.images = []
self.labels = []
for i in range(self.len):
lbl_name = self.lbl_files[i]
if os.path.exists(lbl_name):
lbl_data = np.loadtxt(lbl_name)
lbl_data = np.zeros((0,5),dtype="float64")
if len(lbl_data.shape) == 1:
lbl_data = np.zeros((0,5),dtype="float64")
#self.batch = batch_size
self.shape = (img_sizes, img_sizes)
self.classes = classes
self.mosaic_border = [-img_sizes//2, -img_sizes//2]
self.path = DataDir
self.shuffle = shuffle
self.classes = np.concatenate(self.labels, axis=0)[:,0]
bincount = np.bincount(self.classes.astype("int32"), minlength=classes) + 10
bincount = np.sqrt(np.bincount(self.classes.astype("int32"), minlength=classes) + 10)
self.weight = 1./bincount
self.weight = self.weight * classes / np.sum(self.weight)
self.weight = np.ones((classes), dtype="float32")
def __len__(self):
return len(self.img_files)
def load_img(self, i):
img = cv2.imread(self.img_files[i])
h0, w0, _ = img.shape
r = self.img_size/max(h0,w0)
if r != 1:
img = cv2.resize(img, (int(w0*r), int(h0*r)), interpolation=cv2.INTER_CUBIC)
return img, (h0, w0), img.shape[:2]
def __getitem__(self, index):
yc, xc = (int(random.uniform(-x, 2 * self.img_size + x)) for x in self.mosaic_border)
indices = [index]+[random.choice(range(self.len)) for _ in range(3)]
labels4 = []
for i, index in enumerate(indices):
#img, _, (h,w) = self.images[index]
img, _, (h,w) = self.load_img(index)
# place img in img4
if i == 0: # top left
img4 = np.full((self.img_size * 2, self.img_size * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, self.img_size * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(self.img_size * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, self.img_size * 2), min(self.img_size * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b
padh = y1a - y1b
labels = self.labels[index].copy()
if labels.size > 0:
labels[:,1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
xx = 0
# Concat/clip labels
labels4 = np.concatenate(labels4, 0)
labels4[:, 1:] = np.clip(labels4[:, 1:], 0, 2 * self.img_size) # clip when using random_perspective()
img, labels = random_perspective(img4, labels4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, border=self.mosaic_border)
nl = len(labels)
if nl:
labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
# Albumentations None
# HSV color-space
augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4)
# Flip left-right
if random.random() < 0.5:
img = np.fliplr(img)
if nl:
labels[:, 1] = 1 - labels[:, 1]
labels_out = np.zeros((nl, 6))
if nl:
labels_out[:, 1:] = np.array(labels)
# Convert
img = img.transpose((2, 0, 1))
img = np.ascontiguousarray(img)
out = {"img": img, "label":labels_out}
return img, labels_out
class yolov5(HybridBlock):
def __init__(self,num_classes, batch_size = 16, mode="train", ctx=mx.cpu(), act="silu", gd=1, gw=1):
super(yolov5, self).__init__()
self.ctx = ctx
self.mode = mode
self.batch_size = batch_size
self.act = act
self.conv1 = conv(3, 16*gw,6,2,2, act=self.act)
self.conv2 = conv(16*gw,32*gw,3,2,1, act=self.act)
self.c3_1 = eval(f'c3_rep{gd*1}')(32*gw,32*gw,1,True,group=1,e=0.5, act=self.act)
self.conv3 = conv(32*gw,64*gw,3,2,1, act=self.act)
self.c3_2 = eval(f'c3_rep{gd*2}')(64*gw,64*gw,2,True,group=1,e=0.5, act=self.act)
self.conv4 = conv(64*gw,128*gw,3,2,1, act=self.act)
self.c3_3 = eval(f'c3_rep{gd*3}')(128*gw,128*gw,3,True,group=1,e=0.5, act=self.act)
self.conv5 = conv(128*gw,256*gw,3,2,1, act=self.act)
self.c3_4 = eval(f'c3_rep{gd*1}')(256*gw,256*gw,1,True,group=1,e=0.5, act=self.act)
self.sppf = sppf(256*gw,256*gw,5, act=self.act)
self.conv6 = conv(256*gw,128*gw,1,1, act=self.act)
self.upsample1 = resize()
self.cat1 = cat(dim=1)
self.c3_5 = eval(f'c3_rep{gd*1}')(256*gw,128*gw,1,False,group=1,e=0.5, act=self.act)
self.conv7 = conv(128*gw,64*gw,1,1, act=self.act)
self.upsample2 = resize()
self.cat2 = cat(dim=1)
self.c3_6 = eval(f'c3_rep{gd*1}')(128*gw,64*gw,1,False,group=1,e=0.5, act=self.act)
self.conv8 = conv(64*gw,64*gw,3,2,1, act=self.act)
self.cat3 = cat(dim=1)
self.c3_7 = eval(f'c3_rep{gd*1}')(128*gw,128*gw,1,False,group=1,e=0.5, act=self.act)
self.conv9 = conv(128*gw,128*gw,3,2,1, act=self.act)
self.cat4 = cat(dim=1)
self.c3_8 = eval(f'c3_rep{gd*1}')(256*gw,256*gw,1,False,group=1,e=0.5, act=self.act)
anchors = [[10,13, 16,30, 33,23],[30,61, 62,45, 59,119],[116,90, 156,198, 373,326]]
self.det = detect(self.batch_size, nc=num_classes, anchors=anchors,ch=[64*gw,128*gw,256*gw],inplace=True, mode=self.mode,ctx=self.ctx)
def hybrid_forward(self, F, x):
x = self.conv1(x) #0
x = self.conv2(x) #1
x = self.c3_1(x) #2
x = self.conv3(x) #3
c3_2 = self.c3_2(x) #4
x = self.conv4(c3_2) #5
c3_3 = self.c3_3(x) #6
x = self.conv5(c3_3) #7
x = self.c3_4(x) #8
x = self.sppf(x) #9
conv6 = self.conv6(x) #10
x = self.upsample1(conv6) #11
x = self.cat1(x,c3_3) #12
x = self.c3_5(x) #13
conv7 = self.conv7(x) #14
x = self.upsample2(conv7) #15
x = self.cat2(x,c3_2) #16
c3_6 = self.c3_6(x) #17
x = self.conv8(c3_6) #18
x = self.cat3(x,conv7) #19
c3_7 = self.c3_7(x) #20
x = self.conv9(c3_7) #21
x = self.cat4(x,conv6) #22
c3_8 = self.c3_8(x) #23
out = self.det(c3_6,c3_7,c3_8) #24
return out
lr_steps = sorted([int(ls) for ls in lr_decay_epoch.split(',') if ls.strip()])
lr_decay_epoch = [e for e in lr_steps]
lr_decay_epoch = [i*(len(self.train_dataset)//self.batch_size) for i in lr_decay_epoch]
schedule = mx.lr_scheduler.MultiFactorScheduler(step=lr_decay_epoch, factor=lr_decay)
optimizer = mx.optimizer.Adam(learning_rate=learning_rate, lr_scheduler=schedule)
trainer = gluon.Trainer(self.model.collect_params(), optimizer=optimizer)
final_loss = ComputeLoss(len(self.classes_names),ctx=self.ctx[0], pos_weight=nd.array(self.train_dataset.weight, ctx=self.ctx[0]))
for i, batch in enumerate(self.train_dataloader):
labels = batch[1]
imgs = nd.array(imgs.astype("float32")/255.).as_in_context(self.ctx[0])
with autograd.record():
pred = self.model(imgs)
tcls, tbox, indices, anchors = build_targets(pred, labels, ctx=self.ctx[0], num_classes=len(self.classes_names))
with autograd.record():
loss, lbox, lobj, lcls = final_loss(pred, tcls, tbox, indices, anchors)
trainer.step(self.batch_size, ignore_stale_grad=True)
with autograd.pause():
lbox_np = lbox.asscalar()
lobj_np = lobj.asscalar()
lcls_np = lcls.asscalar()
print("[{}:{}/{}]: loss = {:4f}, lbox = {:4f}, lobj = {:4f}, lcls = {:4f}, lr={:5f}".format(epoch+1,TrainNum,i+1, lbox_np+lobj_np+lcls_np, lbox_np, lobj_np, lcls_np, schedule.base_lr))
if os.path.exists(ModelPath+'_yoloV5') == False:
ClassDict = {}
ClassDict["image_size"] = self.image_size
ClassDict["classes_names"] = self.classes_names
ClassDict["model_name"] = self.model_name
with open(os.path.join(ModelPath+'_yoloV5', "class_index.json"), 'w', encoding='utf-8') as f:
f.write(json.dumps(ClassDict, sort_keys=True, indent=4, separators=(',', ': ')))
self.model.save_parameters(os.path.join(ModelPath+'_yoloV5', "final_model.dat"))
def predict(self,img_cv):
start_time = time.time()
img0s = img_cv.copy()
height, width = img_cv.shape[0:2]
scale = min(self.image_size/height, self.image_size/width)
h0, w0 = height*scale, width*scale
img0 = cv2.resize(img_cv, (round(w0/32.)*32, round(h0/32.)*32))
img = img0.astype("float32")/255.
img = nd.array(img.transpose((2,0,1))[None], ctx = self.ctx[0])
pred = self.model(img).asnumpy()
pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, None, self.agnostic_nms, max_det=self.max_det)
origin_img_pillow = self.cv2_pillow(img_cv)
font = ImageFont.truetype(font='./model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(origin_img_pillow)[1] + 0.5).astype('int32'))
thickness = max((np.shape(origin_img_pillow)[0] + np.shape(origin_img_pillow)[1]) // self.image_size, 1)
# annotator = Annotator(img0s, line_width=1, example=str(self.classes_names))
det = pred[0]
if det.shape[0] > 0:
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0s.shape).round()
return None
imgbox = []
for *xyxy, conf, cls in reversed(det):
cls_id =int(cls)
# label = f'{self.classes_names[cls_id]} {conf:.2f}'
# annotator.box_label(xyxy, label, color=Colors()(cls_id, True))
xmin, ymin, xmax, ymax = int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3])
imgbox.append([(xmin, ymin, xmax, ymax), cls_id, self.classes_names[cls_id], float(f'{conf:.2f}')])
top, left, bottom, right = ymin, xmin, ymax, xmax
label = '{}-{}'.format(self.classes_names[cls_id], float(f'{conf:.2f}'))
draw = ImageDraw.Draw(origin_img_pillow)
label_size = draw.textsize(label, font)
label = label.encode('utf-8')
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
text_origin = np.array([left, top + 1])
for i in range(thickness):
draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[cls_id])
draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[cls_id])
draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
del draw
# img0s = annotator.result()
# cv2.imwrite('1.bmp', img0s)
result_value = {
"image_result": self.pillow_cv2(origin_img_pillow),
"bbox": imgbox,
"time": (time.time() - start_time) * 1000
return result_value
if __name__ == '__main__':
ctu = Ctu_YoloV5(USEGPU='0', image_size=640)
ctu.InitModel(DataDir='E:\DL_Project\DataSet\DataSet_Detection\DataSet_Halcon_YaoPian',split_train = 0.9,batch_size = 2,model_name='yolov5s',Pre_Model='./Model_yoloV5/final_model.dat',valFlag=True)
ctu.train(TrainNum=500,learning_rate=0.00005,lr_decay_epoch='50,100,150,200',lr_decay = 0.9,ModelPath='./Model_x')
# ctu = Ctu_YoloV5(USEGPU='0')
# ctu.LoadModel(r'./Model_x_yoloV5')
# cv2.namedWindow("result", 0)
# cv2.resizeWindow("result", 640, 480)
# index = 0
# for root, dirs, files in os.walk(r'D:/Ctu/Ctu_Project_DL/DataSet/DataSet_Detection_YaoPian/test'):
# for f in files:
# img_cv = ctu.read_image(os.path.join(root, f))
# if img_cv is None:
# continue
# res = ctu.predict(img_cv)
# for each in res['bbox']:
# print(each)
# print("耗时:" + str(res['time']) + ' ms')
# # cv2.imwrite(str(index + 1)+'.bmp',res['image_result'])
# cv2.imshow("result", res['image_result'])
# cv2.waitKey()
# # index +=1