paddle划分数据集到txt中

import fileinput
import glob
import os
import cv2
import random

#数据清洗
def generate_list(img_path,lab_path,train_list):
    write_list = []
    img_list = glob.glob(os.path.join(img_path , "*jpg"))
    #lab_list = glob.glob(os.path.join(lab_path, "*png"))
    # print(img_list)
    for i in img_list:
#数据检查,一一对应
        #img = cv2.imread(i)

        #i.replace(img_path,lab_path)
        lab_p = i.replace(img_path,lab_path)
        #img = cv2.imread(img_p,0)
        #print(img)
        #print(img_p)

        #形成列表
        write_list.append(i + " " + lab_p)
        # print(write_list)

    with open(train_list, "w") as f:
        for i in write_list:
            # print(i)
            changetype = i.rsplit('.',1)[0]
            changetype += '.png'
            # f.write(changetype.replace('E:\\NEt\\PaddleSeg-release-2.6\\data\\buildings\\',"").replace('\\','/') + '\n')
            # f.write(changetype.replace('E:\\NEt\\PaddleSeg-release-2.6\\data\\buildings\\', "").replace('\\', '/') + '\n')
            f.write(changetype.replace('E:\\1\\', "").replace('\\', '/') + '\n')

def random_selet():
    all_list = []
    for i in open(train_list, 'r'):
        all_list.append(i)
        # print(val_list)
    filenumber= len(all_list)      #图片数量
    val_list_rate = 0.1  # 自定义测试集比例,比方说100张抽10张,那就是0.1
    test_list_rate = 0.2    #自定义验证集比例
    picknumber1 = int(filenumber * val_list_rate)  # 按照rate比例从文件夹中取一定数量图片
    picknumber2 = int(filenumber * test_list_rate)
    # print(picknumber1)
    sample1 = random.sample(all_list, picknumber1)
    sample2 = random.sample(all_list, picknumber2)
    with open (val_path,"w") as val:
        for i in sample1:
           val.write(i)
    with open(test_path,'w') as  test:
        for x in sample2:
            test.write(x)


def delete_vallist():              #从train中删除与val中相同的
    with open(train_list,'r') as r:
        alllines = r.readlines()
        with open (val_path,"r") as f:
             for dellist in f:
                if dellist in alllines:
                    index = alllines.index(dellist)
                    print(True)
                    print(index)
                else:
                    print(False)
                del alllines[index]
    with open(train_list,"w") as s:
        for i  in  alllines:
            s.write(i)


def delete_testlist():
    with open(train_list, 'r') as p:
        alllines1 = p.readlines()
        print(len(alllines1))

        with open(test_path, "r") as t:
            for dellist1 in t:
                if dellist1 in alllines1:
                    print(dellist1)
                    index = alllines1.index(dellist1)
                    print(True)
                    print(index)
                else:
                    print(False)
                del alllines1[index]
                print(alllines1)
        with open(train_list, "w") as g:
            for i in alllines1:
                g.write(i)






if __name__ =="__main__":

    img_path = r'E:\1\JPEGImages'         #图像目录
    lab_path = r'E:\1\Annotations'        #标签目录
    train_list = r'E:\1\train.txt'         #train.txt文件
    val_path = r'E:\1\val.txt'            #val.txt 文件
    test_path= r'E:\1\test.txt'            #test.txt
    generate_list(img_path,lab_path,train_list)
    random_selet()
    delete_vallist()
    delete_testlist()

你可能感兴趣的:(小工具,1024程序员节)