从文件夹随机抽取文件到另外一个文件夹

打cvpr2021分类比赛 ,处理长尾数据的小工具。
将每个类别进行不放回抽样picknumber 为最大抽取的数量。避免头部类和尾部类差异过大。

import os, random, shutil

def rand_sampling(pathDir, n):
    """
    :param number_set: 数字列表
    :param n: 组合数位数
    :return: 组合数
    """

    result = []
    for i in range(1, n+1):
        pic = random.choice(pathDir)
        result.append(pic)
        pathDir.remove(pic)

    return result

def moveFile(source,aim):
    pathDir = os.listdir(source)    #取图片的原始路径
    picknumber = 300
    if len(pathDir)<= picknumber:
        sample = pathDir
    else:
        sample = rand_sampling(pathDir, picknumber)
    print(source)
    print("len:",len(pathDir))
    for name in sample:
            s = os.path.join(source, name)
            a = os.path.join(aim, name)
            shutil.copyfile(source+name, aim+name)          
    return

if __name__ == '__main__':
    fileDir = "/home1/zy/train_zy/train/"    #源图片文件夹路径
    tarDir = '/home1/zy/train_zy/train_ib_60/'    #移动到新的文件夹路径
    for root, dirs, files in os.walk(fileDir):
        for d in dirs:
            source = os.path.join(fileDir, d) +'/'
            aim = os.path.join(tarDir, d)+'/'
            if not os.path.exists(tmp):
                os.makedirs(tmp)
            moveFile(source,aim)

你可能感兴趣的:(python,utils,python)