目录
1.修改detect.py为如下,输出左上角和右下角xy坐标:
2.利用detect.py程序中--save-txt生成txt
3.使用如下代码将txt转成xml,请根据个人情况修改:
4.利用labelimg标注软件核对下xml是否正确
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
# xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
# line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
line = (cls, *xyxy, conf) if save_conf else (cls, *xyxy) # label format
with open(f'{txt_path}.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp2/weights/best.pt', help='model path(s)')
parser.add_argument('--source', type=str, default=ROOT / 'test/', help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--data', type=str, default=ROOT / 'data/VOC_safety.yaml', help='(optional) dataset.yaml path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[5184, 3888], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='show results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
opt = parser.parse_args()
import os
from PIL import Image
import cv2
import numpy as np
from tqdm import tqdm
out0 = '''
%(folder)s
%(name)s
%(path)s
%(width)d
%(height)d
3
0
'''
out1 = '''
'''
out2 = '''
'''
'''txt转xml'''
def translate(fdir, lists, txt_dir_temp):
source = {}
label = {}
for jpg in tqdm(lists):
if jpg[-4:] == '.jpg':
image = cv2.imread(jpg)
h, w, _ = image.shape
fxml = jpg.replace('.jpg', '.xml') # xml名称
fxml = open(fxml, 'w')
imgfile = jpg.split('/')[-1] # 图片名称
img_name_nojpg = imgfile.split('.')[-2] # 图片名称
source['name'] = imgfile # filename
source['path'] = jpg
source['folder'] = os.path.basename(fdir)
source['width'] = w
source['height'] = h
fxml.write(out0 % source)
# txt = jpg.replace('.jpg', '.txt')
txt_path = txt_dir_temp + img_name_nojpg + ".txt"
source_file = open(txt_path)
for line in source_file:
staff = line.strip().split(' ')
# box[0]:timeqmj_highpower001;box[1]:0.984745;box[2]:xmin;
# box[3]:ymin;box[4]:xmax;box[5]:ymax
if int(staff[0]) == 0:
name_t = "person"
if int(staff[0]) == 1:
name_t = "wcaqm"
if int(staff[0]) == 2:
name_t = "aqmzc"
if int(staff[0]) == 3:
name_t = "zyaq_aqmbt"
if int(staff[0]) == 4:
name_t = "aqd"
label['class'] = name_t
xmin = int(staff[1])
ymin = int(staff[2])
xmax = int(staff[3])
ymax = int(staff[4])
label['xmin'] = xmin
label['ymin'] = ymin
label['xmax'] = xmax
label['ymax'] = ymax
fxml.write(out1 % label)
fxml.write(out2)
if __name__ == '__main__':
file_dir = 'E:/11/'
txt_dir = 'E:/labels/'
lists = []
img_names1 = os.listdir(file_dir)
img_names1.sort()
for i in img_names1:
# print("i[-3:]:", i[-3:])
if i[-3:] == 'jpg':
lists.append(file_dir + '/' + i) # 图片绝对路径
translate(file_dir, lists, txt_dir)
print('Done')