安装 Anaconda3
Pytorch:1.5.1
Cuda:10.1
Python:3.7
具体的环境配置,可以参考之前的文章
数据集的介绍和下载见这个网站
文中未提及来源的代码均来自这个博主
import os
import json
import codecs
class_name_dic = {
"0": "背景",
"1": "边异常",
"2": "角异常",
"3": "白色点瑕疵",
"4": "浅色块瑕疵",
"5": "深色点块瑕疵",
"6": "光圈瑕疵"
}
rawImgDir='D:/Tianchi/tile_round1_train_20201231/train_imgs/'
rawLabelDir='D:/Tianchi/tile_round1_train_20201231/train_annos.json'
anno_dir='./voc/Annotations/'
if not os.path.exists(anno_dir):
os.makedirs(anno_dir)
with open(rawLabelDir) as f:
annos=json.load(f)
#
image_ann={
}
for i in range(len(annos)):
anno=annos[i]
name = anno['name']
if name not in image_ann:
image_ann[name]=[]
image_ann[name].append(i)
#
for name in image_ann.keys():
indexs=image_ann[name]
height, width = annos[indexs[0]]["image_height"], annos[indexs[0]]["image_width"]
#
with codecs.open(anno_dir + name[:-4] + '.xml', 'w', 'utf-8') as xml:
xml.write('\n' )
xml.write('\t' + name + '\n')
xml.write('\t\n' )
xml.write('\t\t' + str(width) + '\n')
xml.write('\t\t' + str(height) + '\n')
xml.write('\t\t' + str(3) + '\n')
xml.write('\t\n')
cnt = 0
for inx in indexs:
obj = annos[inx]
assert name == obj['name']
bbox = obj['bbox']
category = obj['category']
xmin, ymin, xmax, ymax = bbox
class_name = class_name_dic[str(category)]
#
xml.write('\t)
xml.write('\t\t' + class_name + '\n')
xml.write('\t\t\n' )
xml.write('\t\t\t' + str(int(xmin)) + '\n')
xml.write('\t\t\t' + str(int(ymin)) + '\n')
xml.write('\t\t\t' + str(int(xmax)) + '\n')
xml.write('\t\t\t' + str(int(ymax)) + '\n')
xml.write('\t\t\n')
xml.write('\t\n')
cnt += 1
assert cnt > 0
xml.write('')
# -*- coding: utf-8 -*-
import json
import cv2
from tqdm import tqdm
import os
import xml.etree.ElementTree as ET
#
def get(root, name):
vars = root.findall(name)
return vars
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise NotImplementedError('Can not find %s in %s.'%(name, root.tag))
if length > 0 and len(vars) != length:
raise NotImplementedError('The size of %s is supposed to be %d, but is %d.'%(name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
def deal_xml(xml_f):
tree = ET.parse(xml_f)
root = tree.getroot()
object_list=[]
# 处理每个标注的检测框
for obj in get(root, 'object'):
# 取出检测框类别名称
category = get_and_check(obj, 'name', 1).text
# 更新类别ID字典
bndbox = get_and_check(obj, 'bndbox', 1)
xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1
ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1
xmax = int(get_and_check(bndbox, 'xmax', 1).text)
ymax = int(get_and_check(bndbox, 'ymax', 1).text)
assert (xmax > xmin)
assert (ymax > ymin)
o_width = abs(xmax - xmin)
o_height = abs(ymax - ymin)
obj_info=[xmin,ymin,xmax,ymax,category]
object_list.append(obj_info)
return object_list
#
def draw_voc():
ann_dir = './voc/annotations' #voc格式的数据集存放路径
image_dir = './voc/JPEGImages' #原始图片的存放路径
save_path = './voc/val_with_bbox/' # 绘制完之后的的图片存放路径
if not os.path.exists(save_path): os.makedirs(save_path)
for ann_name in tqdm(os.listdir(ann_dir)):
ann_path=os.path.join(ann_dir,ann_name)
object_list=deal_xml(ann_path)
img = cv2.imread(os.path.join(image_dir , ann_name[:-4]+'.jpg'))
for obj in object_list:
x1 = obj[0]
y1 = obj[1]
x2 = obj[2]
y2 = obj[3]
img = cv2.rectangle(img, (x1,y1), (x2,y2), (255,0,0), 8)
cv2.imwrite(os.path.join(save_path , ann_name[:-4]+'.jpg'),img)
def draw_coco():
ann_path = 'coco/annotations/instances_val2017.json' # annotation json
img_path = 'coco/val2017/'
save_path = 'coco/val2017_with_bbox/' # the path of saveing image with annotated bboxes
#
if not os.path.exists(save_path):os.makedirs(save_path)
with open(ann_path,'r') as f:
ann = json.load(f)
#
# for ann_img in tqdm(ann['images']):
# img = cv2.imread(img_path + ann_img['file_name'])
# img_id = ann_img['id']
# for ann_ann in ann['annotations']:
# if ann_ann['image_id'] == img_id:
# x1 = ann_ann['bbox'][0]
# y1 = ann_ann['bbox'][1]
# x2 = ann_ann['bbox'][0] + ann_ann['bbox'][2]
# y2 = ann_ann['bbox'][1] + ann_ann['bbox'][3]
# img = cv2.rectangle(img, (x1,y1), (x2,y2), (255,0,0), 8)
# cv2.imwrite(save_path + ann_img['file_name'], img)
# #
aug_anns = ann
print("The augmentation image number: %d" % len(aug_anns['images']))
print("The augmentation annotation number: %d" % len(aug_anns['annotations']))
print("")
class_freq_dict = {
}
# init class_fre_dict
for cls in aug_anns['categories']:
class_freq_dict[cls['id']] = 0
# count the instance number of each class
for ann in aug_anns['annotations']:
class_freq_dict[ann['category_id']] += 1
# print out class frequency
print("The instance number of each class:")
for cls_id in list(class_freq_dict.keys()):
for cat in aug_anns['categories']:
if cat['id'] == cls_id:
print(cat['name'], ': ', class_freq_dict[cls_id])
#
if __name__=="__main__":
draw_voc()
绘制瑕疵点之后的图像如下图所示
为了提高识别的精度,对瓷砖图片进行切图处理,参照这篇文章的代码对图片进行切图处理。
实现代码如下
import os
import random
#手动建立
os.makedirs('D:/Tianchi/voc/JPEGImages/', exist_ok=True)
os.makedirs('D:/Tianchi/voc/Annotations/', exist_ok=True)
window_s = 1333
for idx, each_item in enumerate(image_meta):
print(each_item)
bbox = each_item['bbox']
img = cv2.imread(img_paths + each_item['name'])
# h, w = img.shape[:2]
h = img.shape[0]
w = img.shape[1]
# each_img_meta[each_item['name']].append(bbox)
center_x, center_y = int(bbox[0] + (bbox[2] - bbox[0]) /
2), int((bbox[3] - bbox[1]) / 2 + bbox[1])
x, y, r, b = center_x - window_s // 2, center_y - window_s // 2, center_x + window_s // 2, center_y + window_s // 2
x = x - random.randint(50, 100)
y = y - random.randint(50, 100)
x = max(0, x)
y = max(0, y)
r = min(r, w)
b = min(b, h)
boxes = each_img_meta[each_item['name']]
annotations = []
for e_box in boxes:
if x < e_box[0] < r and y < e_box[1] < b and x < e_box[
2] < r and y < e_box[3] < b:
e_box1 = [int(i) for i in e_box]
e_box1[0] = math.floor(e_box1[0] - x)
e_box1[1] = math.floor(e_box1[1] - y)
e_box1[2] = math.ceil(e_box1[2] - x)
e_box1[3] = math.ceil(e_box1[3] - y)
annotations.append(e_box1)
each_img_meta[each_item['name']].remove(e_box)
print('process id:', idx, "|", annotations)
if annotations:
slice_img = img[y:b, x:r]
create_tree(each_item['name'], window_s, window_s)
for anno in annotations:
create_object(annotation, anno[0], anno[1], anno[2], anno[3],
anno[4])
tree = ET.ElementTree(annotation)
slice_name = each_item['name'][:-4] + '_' + str(x) + '_' + str(
y) + '.jpg'
xml_name = each_item['name'][:-4] + '_' + str(x) + '_' + str(
y) + '.xml'
cv2.imwrite('D:/Tianchi/yolov5_material/yolov5-master/voc/JPEGImages/' + slice_name,
slice_img)
tree.write('D:/Tianchi/yolov5_material/yolov5-master/voc/Annotations/' + xml_name)
else:
continue
if x < 0 or y < 0 or r > w or b > h:
print(idx)
print(each_item['name'])
yolov5模型进行目标检测,必须使用yolo格式的数据
因此利用下面的程序创建yolo格式的数据
'''
标注文件转换xml转txt(vol to yolo)转完后需添加labels文件,即数字序号对应的标签名。
'''
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
classes = ['边异常', '角异常', '白色点瑕疵', '浅色块瑕疵', '深色点块瑕疵','光圈瑕疵']
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
if x >= 1:
x = 0.99
if y >= 1:
y = 0.99
if w>=1:
w=0.99
if h>=1:
h=0.99
return (x,y,w,h)
def convert_annotation(rootpath,xmlname):
xmlpath = rootpath + '/Annotations'
xmlfile = os.path.join(xmlpath,xmlname)
with open(xmlfile, "r", encoding='UTF-8') as in_file:
txtname = xmlname[:-4]+'.txt'
print(txtname)
txtpath = rootpath + '/worktxt'
if not os.path.exists(txtpath):
os.makedirs(txtpath)
txtfile = os.path.join(txtpath,txtname)
with open(txtfile, "w+" ,encoding='UTF-8') as out_file:
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
out_file.truncate()
for obj in root.iter('object'):
#difficult = obj.find('difficult').text
cls = obj.find('name').text
#if cls not in classes or int(difficult)==1:
#continue
if cls not in classes:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
if __name__ == "__main__":
rootpath='./voc'
xmlpath=rootpath+'/Annotations'
list=os.listdir(xmlpath)
for i in range(0,len(list)) :
path = os.path.join(xmlpath,list[i])
if ('.xml' in path)or('.XML' in path):
convert_annotation(rootpath,list[i])
print('done', i)
else:
print('not xml file',i)
生成的yolo格式数据如下图所示,最前面的数字 4 为瑕疵点类别编号,后面的数据为瑕疵点的位置
import os
import shutil
import random
ratio=0.1
img_dir='./voc/JPEGImages' #图片路径
label_dir='./voc/worktxt'#生成的yolo格式的数据存放路径
train_img_dir='.//coco/images/train2017'#训练集图片的存放路径
val_img_dir='./coco/images/val2017'
train_label_dir='./coco/labels/train2017'#训练集yolo格式数据的存放路径
val_label_dir='./coco/labels/val2017'
if not os.path.exists(train_img_dir):
os.makedirs(train_img_dir)
if not os.path.exists(val_img_dir):
os.makedirs(val_img_dir)
if not os.path.exists(train_label_dir):
os.makedirs(train_label_dir)
if not os.path.exists(val_label_dir):
os.makedirs(val_label_dir)
names=os.listdir(img_dir)
val_names=random.sample(names,int(len(names)*ratio))
cnt_1=0
cnt_2=0
for name in names:
if name in val_names:
#cnt_1+=1
#if cnt_1>100:
#break
shutil.copy(os.path.join(img_dir,name),os.path.join(val_img_dir,name))
shutil.copy(os.path.join(label_dir, name[:-4]+'.txt'), os.path.join(val_label_dir, name[:-4]+'.txt'))
else:
#cnt_2+=1
#if cnt_2>1000:
#break
shutil.copy(os.path.join(img_dir, name), os.path.join(train_img_dir, name))
shutil.copy(os.path.join(label_dir, name[:-4] + '.txt'), os.path.join(train_label_dir, name[:-4] + '.txt'))
模型环境的建立和模型训练流程可以参照之前的文章
模型训练之前需要更改coco128.yaml 和 yolov5s.yaml中的nc(识别对象的类别数目)和 names
进入Yolov5的环境,切换到相应的目录,对模型进行训练
(之前的文章有详细步骤)
python train.py --img 640 --data data/coco128.yaml --cfg models/yolov5s.yaml --weights weights/yolov5s.pt --batch-size 16 --epochs 30
import argparse
import torch.backends.cudnn as cudnn
from utils import google_utils
from utils.datasets import *
from utils.utils import *
import json
submit_result=[]
def infer_one_slice(im0,cur_x,cur_y):
img = letterbox(im0, new_shape=opt.slice_size)[0]
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)
img = torch.from_numpy(img).to(device)
#img = img.float() # uint8 to fp16/32
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
pred = model(img, augment=False)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres)
boxes = []
max_score=0
for i, det in enumerate(pred): # detections per image
# save_path = 'draw/' + image_id + '.jpg'
if det is not None and len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Write results
for *xyxy, conf, cls in det:
boxes.append([int(xyxy[0]+cur_x), int(xyxy[1]+cur_y), int(xyxy[2]+cur_x), int(xyxy[3]+cur_y),int(cls.item())+1,conf.item()])
if conf.item()>max_score:
max_score=conf.item()
#
#print(max_score)
if max_score>0.3:
return boxes
else:
return []
def slice_im(image_path, sliceHeight=640, sliceWidth=640,overlap=0.01):
#
result_pre=[]
image0 = cv2.imread(image_path, 1) # color
win_h, win_w = image0.shape[:2]
#
n_ims = 0
dx = int((1. - overlap) * sliceWidth)
dy = int((1. - overlap) * sliceHeight)
for y0 in range(0, image0.shape[0], dy):
for x0 in range(0, image0.shape[1], dx):
n_ims += 1
#
#这一步确保了不会出现比要切的图像小的图,其实是通过调整最后的overlop来实现的
#举例:h=6000,w=8192.若使用640来切图,overlop:0.2*640=128,间隔就为512.所以小图的左上角坐标的纵坐标y0依次为:
#:0,512,1024,....,5120,接下来并非为5632,因为5632+640>6000,所以y0=6000-640
if y0 + sliceHeight > image0.shape[0]:
y = image0.shape[0] - sliceHeight
else:
y = y0
if x0 + sliceWidth > image0.shape[1]:
x = image0.shape[1] - sliceWidth
else:
x = x0
#
# extract image
window_c = image0[y:y + sliceHeight, x:x + sliceWidth]
#cv2.imwrite(outpath, window_c)
#------对切出来的一副图像进行预测------
slice_bbox=infer_one_slice(window_c,x,y)#返回的是这一个slice的目标集合
if slice_bbox!=[]:
result_pre+=slice_bbox
return result_pre
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='weights/best.pt', help='model.pt path')
parser.add_argument('--source', type=str, default='D:/Tianchi/yolo/test', help='source') # file/folder, 0 for webcam
parser.add_argument('--output', type=str, default='inference/output', help='output folder') # output folder
parser.add_argument('--slice_size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.05, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
opt = parser.parse_args()
print(opt)
# Initialize
device = torch_utils.select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
google_utils.attempt_download(opt.weights)
model = torch.load(opt.weights, map_location=device)['model'].float().eval() # load FP32 model
#imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
if half:
model.half() # to FP16
#
for per_img_name in tqdm(os.listdir(opt.source)):
image_path = os.path.join(opt.source, per_img_name)
#
image_result_pre = slice_im(image_path, sliceHeight=opt.slice_size, sliceWidth=opt.slice_size)
# print(image_result_pre)
'''
image_result_pre:如果切图之间存在ovelap,可以经过一个NMS
'''
if image_result_pre != [[]]:
for per_pre in image_result_pre:
submit_result.append(
{
'name': per_img_name, 'category': per_pre[4], 'bbox': per_pre[:4], 'score': per_pre[5]})
#
print(submit_result)
if not os.path.exists('results/'):os.makedirs('results/')
with open('results/resut_post.json', 'w') as fp:
json.dump(submit_result, fp, indent=4, ensure_ascii=False)
部分检测结果如下
{
"name": "198_23_t20201119103451643_CAM3.jpg",
"category": 1,
"bbox": [
1266,
1124,
1358,
1200
],
"score": 0.05110248923301697
},
{
"name": "198_23_t20201119103451643_CAM3.jpg",
"category": 6,
"bbox": [
3227,
1074,
3341,
1173
],
"score": 0.3168776035308838
}