解决not well-formed (invalid token)BUG,xml标签转到txt标签,txt标签转到xml标签,滑动窗口切割图像并且同步标签

紧接上次,我的最终目的现在是滑动窗口切割图片并且同步标签。需要对xml文件进行读写。

python报错xml.etree.ElementTree.ParseError: not well-formed (invalid token): line 3, column 50

我的解决方式最后我发现一个问题目标框好像被移动了。

所以最后我是xml文件转txt标签,然后根据txt文件新建xml文件,还可以没有错误啦!

xml标签 to yolov4txt标签

'''#这个文件为对ann_path_origin和img_path_origin选取特定类别的图片和xml
#生成的结果为一个图,一个txt文件,这个代码删除了只添加正样本(含有想要检测对象的样本),意味着尽管这张图片没有包含目标也会被写入txt
不会复制提取出来的图片
'''
import os
from tqdm import tqdm
import shutil
import xml.etree.ElementTree as ET
import xml

ann_path_origin = r'/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/Annotations/'        #要提取的ann的xml标签文件路径,按需修改
img_path_origin = r'/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/JPEGImages/'         #要提取的Image文件路径,按需修改
# img_savepath = r'/home/xys/CppProjects/darknet/xinding/obj/'         #要保存到的Images文件路径,按需修改
txt_save_path = r'/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/labels/'   #存入txt的路径,,按需修改
class_names_path=r'/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/class.txt'     #类别文件路径

#-----------------------函数定义------------------------------------
def read_class_name(path):        #读取path下的类别民
    f = open(path,'r')
    classes_name = []
    for i in f.readlines():
        classes_name.append(i.strip())
    return classes_name

def mkr(path):
    '''
    如果path存在,就先删除所有内容再创建,否则直接创建
    '''
    if os.path.exists(path):#先删除再创建
        shutil.rmtree(path)#递归删除目录树
        os.mkdir(path)#创建目录
    else:
        os.mkdir(path)#如果不存在则直接创建

names = locals()
classes_name = read_class_name(class_names_path)  #读取类别信息,按需修改
# mkr(img_savepath)                               #创建文件夹
mkr(txt_save_path)

count = 0
for xml_file_name in tqdm(os.listdir(ann_path_origin),ncols=150):
    file_name = xml_file_name[:-4]
    xml_file = open(ann_path_origin+xml_file_name,errors='ignore')      #打开xml文件
    tree = ET.parse(xml_file)
    # try:
    #     tree = ET.parse(xml_file)
    # except xml.etree.ElementTree.ParseError:
    #     print(xml_file_name)
    #     file_front = xml_file_name[:-4]
    #     shutil.move(ann_path_origin+xml_file_name, '/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/新建文件夹')
    #     shutil.move(img_path_origin+file_front+'.jpg', '/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/新建文件夹')
    root = tree.getroot()                               #得到根
    image_width = int(root.find('size').find('width').text)
    image_height = int(root.find('size').find('height').text)

    all_line = []
    for obj in root.iter('object'):
        diffcult = obj.find('difficult').text         #找到difficult,难以识别的类
        cls_name = obj.find('name').text
        if cls_name not in classes_name or int(diffcult) == 1:
            continue

        #--------------------下面的语句表示存在这个boundingbox,将此boundingbox的坐标类别写入到xml里-------------------
        xmlbox = obj.find('bndbox')                 #找到boundingbox标签
        xmin = int(float(xmlbox.find('xmin').text))
        ymin = int(float(xmlbox.find('ymin').text))
        xmax = int(float(xmlbox.find('xmax').text))
        ymax = int(float(xmlbox.find('ymax').text))

        #将boundingbox的四个坐标准换成中心点的坐标和boundingbox的宽高,并且转换映射到0-1
        cls_id = classes_name.index(cls_name)
        bb_h, bb_w = ymax - ymin, xmax - xmin
        if image_width == 0 or image_height == 0:
            continue
        coodr_x, coord_y = (xmin + bb_w / 2) / image_width, (ymin + bb_h / 2) / image_height
        a_line = str(cls_id)+" "+str(coodr_x)+" "+str(coord_y)+" "+str(bb_w/image_width)+" "+str(bb_h/image_height)+"\n"
        all_line.append(a_line)

    #如果这张图片有我们需要的类,则保存这张图片,并写入到txt文件里
    txt_file = open(txt_save_path+file_name+".txt",'a')
    txt_file.writelines(all_line)
    # shutil.copy(img_path_origin+file_name+".jpg",img_savepath+file_name+".jpg")
    txt_file.close()
    xml_file.close()
    count += 1
print(f"---------转换了{count}个元素,结束-----------------")

 txt标签 to xml标签

from xml.dom.minidom import Document
import os
import cv2
from tqdm import tqdm

def makexml(txtPath,xmlPath,picPath): #读取txt路径,xml保存路径,数据集图片所在路径
        dict = {'0': "hong-zhi",#字典对类型进行转换
                '1': "hong-zuo",
                '2': "hong-you",
                '3': "lv-zhi",
                '4': "lv-zuo",
                '5': "lv-you",
                '6': "huang-zhi",
                '7': "huang-zuo",
                '8': "huang-you",
                '9': "hei",
                '10': "feijidongchedeng"}
        files = os.listdir(txtPath)
        for i, name in tqdm(enumerate(files)):
            xmlBuilder = Document()
            annotation = xmlBuilder.createElement("annotation")  # 创建annotation标签
            xmlBuilder.appendChild(annotation)
            txtFile=open(txtPath+name)
            txtList = txtFile.readlines()
            img = cv2.imread(picPath+name[0:-4]+".jpg")
            Pheight,Pwidth,Pdepth=img.shape
            # for i in txtList:
            #    oneline = i.strip().split(" ")

            folder = xmlBuilder.createElement("folder")#folder标签
            folderContent = xmlBuilder.createTextNode("VOC2007")
            folder.appendChild(folderContent)
            annotation.appendChild(folder)

            filename = xmlBuilder.createElement("filename")#filename标签
            filenameContent = xmlBuilder.createTextNode(name[0:-4]+".jpg")
            filename.appendChild(filenameContent)
            annotation.appendChild(filename)

            size = xmlBuilder.createElement("size")  # size标签
            width = xmlBuilder.createElement("width")  # size子标签width
            widthContent = xmlBuilder.createTextNode(str(Pwidth))
            width.appendChild(widthContent)
            size.appendChild(width)
            height = xmlBuilder.createElement("height")  # size子标签height
            heightContent = xmlBuilder.createTextNode(str(Pheight))
            height.appendChild(heightContent)
            size.appendChild(height)
            depth = xmlBuilder.createElement("depth")  # size子标签depth
            depthContent = xmlBuilder.createTextNode(str(Pdepth))
            depth.appendChild(depthContent)
            size.appendChild(depth)
            annotation.appendChild(size)

            for i in txtList:
                oneline = i.strip().split(" ")
                object = xmlBuilder.createElement("object")
                picname = xmlBuilder.createElement("name")
                nameContent = xmlBuilder.createTextNode(dict[oneline[0]])
                picname.appendChild(nameContent)
                object.appendChild(picname)
                pose = xmlBuilder.createElement("pose")
                poseContent = xmlBuilder.createTextNode("Unspecified")
                pose.appendChild(poseContent)
                object.appendChild(pose)
                truncated = xmlBuilder.createElement("truncated")
                truncatedContent = xmlBuilder.createTextNode("0")
                truncated.appendChild(truncatedContent)
                object.appendChild(truncated)
                difficult = xmlBuilder.createElement("difficult")
                difficultContent = xmlBuilder.createTextNode("0")
                difficult.appendChild(difficultContent)
                object.appendChild(difficult)
                bndbox = xmlBuilder.createElement("bndbox")
                xmin = xmlBuilder.createElement("xmin")
                mathData=int(((float(oneline[1]))*Pwidth+1)-(float(oneline[3]))*0.5*Pwidth)
                xminContent = xmlBuilder.createTextNode(str(mathData))
                xmin.appendChild(xminContent)
                bndbox.appendChild(xmin)
                ymin = xmlBuilder.createElement("ymin")
                mathData = int(((float(oneline[2]))*Pheight+1)-(float(oneline[4]))*0.5*Pheight)
                yminContent = xmlBuilder.createTextNode(str(mathData))
                ymin.appendChild(yminContent)
                bndbox.appendChild(ymin)
                xmax = xmlBuilder.createElement("xmax")
                mathData = int(((float(oneline[1]))*Pwidth+1)+(float(oneline[3]))*0.5*Pwidth)
                xmaxContent = xmlBuilder.createTextNode(str(mathData))
                xmax.appendChild(xmaxContent)
                bndbox.appendChild(xmax)
                ymax = xmlBuilder.createElement("ymax")
                mathData = int(((float(oneline[2]))*Pheight+1)+(float(oneline[4]))*0.5*Pheight)
                ymaxContent = xmlBuilder.createTextNode(str(mathData))
                ymax.appendChild(ymaxContent)
                bndbox.appendChild(ymax)
                object.appendChild(bndbox)

                annotation.appendChild(object)

            f = open(xmlPath+name[0:-4]+".xml", 'w')
            xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
            f.close()
jpg_path = '/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/JPEGImages/'
txt_path = '/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/labels/'
xml_path = '/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/Annotations/'
makexml(txt_path , xml_path, jpg_path)

然后就是滑动窗口切割同步标签

'''实现分辨率为w*h裁剪成多张a*a大小的图片(a<=w且a<=h),以大小为a/2的步长滑动,裁剪同时修改标注框。'''

# coding:utf-8
import cv2
import os
import codecs

import xml.dom.minidom as xmldom
import xml.etree.ElementTree as ET
import numpy as np
from PIL import Image
from tqdm import tqdm
import sys

sys.setrecursionlimit(10000)


def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
        (im_height, im_width, 3)).astype(np.uint16)
def voc_xml_parse(xml_path):
    object_list = []
    domobj = xmldom.parse(xml_path)
    elementobj = domobj.documentElement
    folderobj = elementobj.getElementsByTagName("folder")[0]
    filenameobj = elementobj.getElementsByTagName("filename")[0]
    # sourceobj = elementobj.getElementsByTagName("source")[0]
    # ownerobj = elementobj.getElementsByTagName("owner")[0]
    sizeobj = elementobj.getElementsByTagName("size")[0]
    # segmentedobj = elementobj.getElementsByTagName("segmented")[0]
    head = {'folder': folderobj, 'filename': filenameobj, 'size': sizeobj,}
    object_list = elementobj.getElementsByTagName("object")
    return head, object_list


def voc_xml_modify(modify_xml_path, head, object_list):
    dom = xmldom.Document()
    root = dom.createElement('annotation')
    dom.appendChild(root)
    for obj in head.values():
        root.appendChild(obj)
    for obj in object_list:
        root.appendChild((obj))
    with open(modify_xml_path, 'w', encoding='utf-8') as f:
        dom.writexml(f, addindent='\t', newl='\n', encoding='utf-8')
    return


def crop_xml_modify(head, objectlist, hmin, wmin, new_height, new_width, origin_xml__path):
    filenameobj = head['filename']
    sizeobj = head['size']
    width = sizeobj.getElementsByTagName('width')[0]
    width.childNodes[0].data = str(new_width)
    # print(str(WIDTH))
    height = sizeobj.getElementsByTagName('height')[0]
    height.childNodes[0].data = str(new_height)
    # tree = ET.parse(origin_xml__path)
    # root = tree.getroot()
    obj = objectlist
    i = 0
    while (i < obj.length):
        # for obj in objectlist1:
        bndbox = obj[i].getElementsByTagName('bndbox')[0]
        xmin = bndbox.getElementsByTagName('xmin')[0]
        XMIN = float(xmin.childNodes[0].data)
        ymin = bndbox.getElementsByTagName('ymin')[0]
        YMIN = float(ymin.childNodes[0].data)
        xmax = bndbox.getElementsByTagName('xmax')[0]
        XMAX = float(xmax.childNodes[0].data)
        ymax = bndbox.getElementsByTagName('ymax')[0]
        YMAX = float(ymax.childNodes[0].data)
        if (XMIN >= wmin) and (XMAX <= (wmin + new_width)) and (YMIN >= hmin) and (YMAX <= (hmin + new_height)):
            xmin.childNodes[0].data = str(int(XMIN - wmin))
            xmax.childNodes[0].data = str(int(XMAX - wmin))
            ymin.childNodes[0].data = str(int(YMIN - hmin))
            ymax.childNodes[0].data = str(int(YMAX - hmin))
        else:
            obj.remove(obj[i])
            i = i - 1  # 一定要向前提一个位置 删除的话用for是会出错的 耽搁了好久。。。
            # obj = objectlist1[i-1]
        i = i + 1
    return head, obj


def crop_dataset(imgpath, output_shape, annotation, cropAnno, cropImg, stride):
    origin_image = cv2.imread(imgpath)
    # image = Image.open(imgpath)
    # image_np = load_image_into_numpy_array(image)
    # origin_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
    height, width = origin_image.shape[:2]
    # print(height)
    # print(width)
    domobj = xmldom.parse(annotation)
    elementobj = domobj.documentElement
    name = elementobj.getElementsByTagName("name")
    size = len(name)
    # tree = ET.parse(origin_xml__path)
    # root = tree.getroot()
    x = 0
    newheight = output_shape
    newwidth = output_shape
    while x < width:
        y = 0
        if x + newwidth <= width:
            while y < height:
                # 裁剪为output_shape*output_shape
                # newheight = output_shape
                # newwidth = output_shape
                head, objectlist = voc_xml_parse(annotation)
                if y + newheight <= height:
                    hmin = y
                    hmax = y + newheight
                    wmin = x
                    wmax = x + newwidth
                else:
                    hmin = height - newheight
                    hmax = height
                    wmin = x
                    wmax = x + newwidth
                    y = height  # test
                modify_head, modify_objectlist = crop_xml_modify(head, objectlist, hmin, wmin, newheight, newwidth,
                                                                 origin_xml__path)
                cropAnno1 = cropAnno + '_' + str(wmax) + '_' + str(hmax) + '_' + str(output_shape) + '.xml'
                cropImg1 = cropImg + '_' + str(wmax) + '_' + str(hmax) + '_' + str(output_shape) + '.jpg'



                import random
                throud = 3


                if len(modify_objectlist)>0 or random.randint(0, 100) < throud:
                    cv2.imwrite(cropImg1, origin_image[hmin: hmax, wmin: wmax])
                    voc_xml_modify(cropAnno1, modify_head, modify_objectlist)

                y = y + stride
                if y + output_shape == height:  # 第一张图就已经涵盖了height*height
                    y = height
                # if y + newheight > height:
                #     break
        else:
            while y < height:
                # 裁剪为output_shape*output_shape
                # newheight = output_shape
                # newwidth = output_shape
                head, objectlist = voc_xml_parse(annotation)
                if y + newheight <= height:
                    hmin = y
                    hmax = y + newheight
                    wmin = width - newwidth
                    wmax = width
                else:
                    hmin = height - newheight
                    hmax = height
                    wmin = width - newwidth
                    wmax = width
                    y = height  # test
                modify_head, modify_objectlist = crop_xml_modify(head, objectlist, hmin, wmin, newheight, newwidth,
                                                                 origin_xml__path)
                cropAnno1 = cropAnno + '_' + str(wmax) + '_' + str(hmax) + '_' + str(output_shape) + '.xml'
                cropImg1 = cropImg + '_' + str(wmax) + '_' + str(hmax) + '_' + str(output_shape) + '.jpg'
                if  len(modify_objectlist)> 0 or random.randint(0, 100) < throud:
                    cv2.imwrite(cropImg1, origin_image[hmin: hmax, wmin: wmax])
                    voc_xml_modify(cropAnno1, modify_head, modify_objectlist)
                y = y + stride
                # if y + newheight > height:
                #     break
            x = width
        x = x + stride
        if x + output_shape == width:  #第一张图就已经涵盖了height*height
            x = width
        # if x + newwidth > width:
        #     break


if __name__ == '__main__':
    # output_shape 为想要裁剪成的图片尺寸
    output_shape = 512  # 512 1024
    stride = int(output_shape / 2)
    imgpath = '/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/jpg原图/'  # 原图路径
    annotation = '/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/xml去除无效字符/'  # 原图对应的标注xml文件路径
    cropAnno = '/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/Annotations/'  # 裁剪后存储xml的路径
    cropImg = '/home/xys/CloundShiProjects/traffic_light/trafficlight_dect/data/JPEGImages/'  # 裁剪后存储图片的路径
    if not os.path.exists(cropImg):
        os.mkdir(cropImg)
    if not os.path.exists(cropAnno):
        os.mkdir(cropAnno)
    for each in tqdm(os.listdir(annotation)):
        # each = os.listdir(annotation)
        name = each.split('.')[0]
        origin_img_path = os.path.join(imgpath, name + '.jpg')
        origin_xml__path = os.path.join(annotation, name + '.xml')
        crop_img_path = os.path.join(cropImg, name)
        crop_xml__path = os.path.join(cropAnno, name)
        # tree = ET.parse(origin_xml__path)
        # root = tree.getroot()
        crop_dataset(origin_img_path, output_shape, origin_xml__path, crop_xml__path, crop_img_path, stride)

你可能感兴趣的:(yolov4,xml,bug,python)