数据集处理之python生成.lst文件

# -- coding:utf-8 --
import fnmatch
import os
import pandas as pd
import numpy as np

def mergeFile():
    file1 = open("2.lst", "r",encoding='UTF-8')
    file2 = open("1.lst", "r",encoding='UTF-8')
    file_list1 = file1.readlines()  # 将所有变量读入列表file_list1
    file_list2 = file2.readlines()  # 将所有变量读入列表file_list2
    file_list=[]
    for i in range(file_list1.__len__()):
        a=str(file_list1[i])
        a=a.replace('\n','').replace('\\','/')
        b = str(file_list2[i])
        b = b.replace('\n','').replace('\\', '/').replace('goundTruth','groundTruth')
        file_list.append(a + ' ' + b)
    df = pd.DataFrame(file_list, columns=['one'])
    df.to_csv('trian.lst', columns=['one'], index=False, header=False)
    # file = open("train_pair.lst", "w")
    # file.writelines(file_list)
    file1.close()
    file2.close()
    # file.close()


def ReadSaveAddr(Stra,Strb):
    df = pd.DataFrame(np.arange(0).reshape(0,1),columns=['Addr'])
    print(df)
    path = InputStra
    for dirpath,dirnames,filenames in os.walk(path):
        filenames_len=filenames.__len__()
        for i in range(filenames_len):
            filenames[i]=filenames[i][:-4]
        # a_list = fnmatch.filter(os.listdir(dirpath),Strb)
        if filenames_len:
            dft = pd.DataFrame(np.arange(filenames_len).reshape((filenames_len,1)),columns=['Addr'])
            dft.Addr = filenames
            dft.Addr = dirpath.replace('D:/qq_file/2275316862/FileRecv/','') + '/' + dft.Addr #输出绝对路径
            frames = [df,dft]
            df = pd.concat(frames)
            print(df.shape)
    df.to_csv('2.lst',columns=['Addr'],index=False,header=False)#***.lst即为最终保存的文件名,可修改
    print("Write To Get.lst !")



if __name__ == '__main__':
    #InputStra="D:/qq_file/2275316862/FileRecv/data/data/train/trainingset"#数据存在的路径
    InputStra="D:/qq_file/2275316862/FileRecv/test"
    InputStrb="*.png"
    ReadSaveAddr(InputStra,InputStrb)
    #mergeFile()

 

你可能感兴趣的:(数据集处理之python生成.lst文件)