经常需要读取某个文件夹下所有的图像文件。
我使用python写了个简单的代码,读取某个文件夹下某个后缀的文件,将文件名生成为文本(csv格式)
import fnmatch
import os
import pandas as pd
import numpy as np
import sys
InputStra = sys.argv[1]
InputStrb = sys.argv[2]
def ReadSaveAddr(Stra,Strb):
#print(Stra)
#print(Strb)
print("Read :",Stra,Strb)
a_list = fnmatch.filter(os.listdir(Stra),Strb)
print("Find = ",len(a_list))
df = pd.DataFrame(np.arange(len(a_list)).reshape((len(a_list),1)),columns=['Addr'])
df.Addr = a_list
#print(df.head())
df.to_csv('Get.lst',columns=['Addr'],index=False,header=False)
print("Write To Get.lst !")
ReadSaveAddr(InputStra,InputStrb)
上面代码保存为:GetLst.py
使用时:
在cmd窗口输入:
python GetLst.py F:/train/pos *.png
发现上面代码不能深入到下一层目录,又做了点修改:
def ReadSaveAddr2(Stra,Strb):
df = pd.DataFrame(np.arange(0).reshape(0,1),columns=['Addr'])
print(df)
path = InputStra
for dirpath,dirnames,filenames in os.walk(path):
#for filename in filenames:
a_list = fnmatch.filter(os.listdir(dirpath),Strb)
if len(a_list):
dft = pd.DataFrame(np.arange(len(a_list)).reshape((len(a_list),1)),columns=['Addr'])
dft.Addr = a_list
dft.Addr = dirpath + '\\' + dft.Addr#输出绝对路径
frames = [df,dft]
df = pd.concat(frames)
print(df.shape)
df.to_csv('Get.lst',columns='Addr'],index=False,header=False)
print("Write To Get.lst !")