适用于多类别的数据集文件的扫描,可检索数量、文件类型,防止自己创建数据集时出现纰漏,根据具体文件结构情况稍微改动即可
import sys
import shutil
import glob
def get_all(path):
return glob.glob(path)
def get_iterator(path): # 返回一个可迭代的数据
return glob.iglob(path)
def main(controll):
path = "E:\Desktop\Zhou\data13\*"
# glob_dir = get_all(path) #可使用相对或绝对路径
# print(glob_dir)
iglob_dir = get_iterator(path)
for first_subpath in iglob_dir: # 对应sketch和view目录
first_sub_name = first_subpath.split("\\")[-1]
print("\033[1;35;46m"+first_sub_name+"统计如下:\033[0m")
second_subpath = get_iterator(first_subpath+"\*")
class_num = 0
sketch_train_num = 0
sketch_test_num = 0
views_train_num = 0
views_test_num = 0
total_type = set()
for class_path in second_subpath: # 对应每个样本类的目录
class_num = class_num+1
class_name = class_path.split("\\")[-1]
if controll:
print(" "+class_name+":")
third_subpath = get_iterator(class_path+"\*") # 对应每个类的train和test文件夹
for train_or_test_path in third_subpath:
train_or_test_name = train_or_test_path.split("\\")[-1]
all_files = get_all(train_or_test_path+"\*")
per_class_type = set()
per_class_type.update(str(image_type.split(".")[-1]) for image_type in all_files)
total_type = set.union(total_type, per_class_type)
if controll:
print(" "+train_or_test_name+":",end="")
print(len(all_files),end=" ")
print("包含的文件类型有:",per_class_type)
if first_sub_name == "sketch" and train_or_test_name == "train":
sketch_train_num += len(all_files)
if first_sub_name == "views" and train_or_test_name == "train" :
views_train_num += len(all_files)
if first_sub_name == "sketch" and train_or_test_name == "test":
sketch_test_num += len(all_files)
if first_sub_name == "views" and train_or_test_name == "test" :
views_test_num += len(all_files)
print("class_num===========================================================", class_num)
if first_sub_name == "sketch":
print("sketch_train_num====================================================",sketch_train_num)
print("sketch_test_num=====================================================",sketch_test_num)
print("包含的文件类型有====================================================",total_type)
if first_sub_name == "views":
print("views_train_num=====================================================", views_train_num)
print("views_test_num======================================================", views_test_num)
print("包含的文件类型有====================================================", total_type)
if __name__ == '__main__':
controll = 0 # 是否显示每个类的详细信息,显示就写1
main(controll)