python数据集处理小工具合集

文件重命名

import glob
import shutil
import os

old_dir = "./label"

file_list = glob.glob(old_dir+"./*")

for file in file_list:
    ori_name = os.path.split(file)[1]
    new_name1 = ori_name.split(".")[0]
    new_name2 = new_name1+".txt"
    os.rename(file,new_name2)
    shutil.move(new_name2,old_dir)
    print("ok")

移动文件

import glob
import shutil
import os

filePath = "./data/anno"
out_path = './data/dark_anno_train'
if not os.path.exists(out_path):
    os.mkdir(out_path)

file_list = os.listdir(filePath)

for class_name in file_list:
    # matrix[i] = "filename"
    # print(filePath)
    # print(file_name)
    file_list = glob.glob(filePath + "/" + class_name + "/*")
    move_dir = out_path
    if not os.path.exists(move_dir):
        os.mkdir(move_dir)
    print(file_list)
    for input_file in file_list:
        #temp = os.path.split(input_file)[1]
        shutil.move(input_file,move_dir)

文件按条件筛选

from PIL import Image
import glob
import numpy as np
import shutil
import os

filePath = "./data/dark"
out_path ='./data/out'
if not os.path.exists(out_path):
    os.mkdir(out_path)

file_list = os.listdir(filePath)

for class_name in file_list:
    #matrix[i] = "filename"
    #print(filePath)
    #print(file_name)
    file_list = glob.glob(filePath+"/"+class_name+"/*")
    move_dir =os.path.join(out_path, class_name)
    if not os.path.exists(move_dir):
        os.mkdir(move_dir)
    print(file_list)
    for input_file in file_list:
        #print(input_file)
        #print(image)
        image1 = Image.open(input_file)
        image = np.array(image1)
        #image = image.convert("RGB")
        
        if len(image.shape) != 3 or (image1.size[0] >2000 or image1.size[1] > 2000) or image1.mode != "RGB":
            print(len(image.shape))
            print(image1.size)
            #move_path = os.path.join(move_dir,input_file) 
            shutil.move(input_file, move_dir)
        else:
            continue

文件夹切分

import os
import shutil
 
def mv_file(img, num,class_name):
    list_ = os.listdir(img)
    if num > len(list_):
        print('长度需小于:', len(list_))
        exit()
    num_file = int(len(list_)/num) + 1
    cnt = 0
    for n in range(1,num_file+1): # 创建文件夹
        new_file = os.path.join(img + '_' + str(n))
        move_file = os.path.join(os.path.dirname(os.path.dirname(img))+"/temp/"+class_name)
        #move_file = os.path.join(move_file,"/temp/"+class_name)
        move_file = os.path.join(move_file+'_'+str(n))
        if os.path.exists(new_file+'_'+str(cnt)):
            print('该路径已存在,请解决冲突', new_file)
            exit()
        if os.path.exists(move_file+'_'+str(cnt)):
            print('该路径已存在,请解决冲突', move_file)
            exit()
        print('创建文件夹:', new_file)
        print('创建文件夹:', move_file)
        os.mkdir(new_file)
        os.mkdir(move_file)
        list_n = list_[num*cnt:num*(cnt+1)]
        for m in list_n:
            old_path = os.path.join(img, m)
            new_path = os.path.join(new_file, m)
            move_path = os.path.join(move_file,m)
            shutil.copy(old_path, new_path)
            shutil.move(new_path,move_path)
        cnt = cnt + 1
    print('============task OK!===========')




if __name__ == "__main__":
    filePath = "./data/dark"
    file_list = os.listdir(filePath)
    for class_name in file_list:
        print(class_name)
        class_path = os.path.join(filePath+"/"+class_name)
        print(class_path)
        mv_file(class_path, 10,class_name) # 操作目录,单文件夹存放数量
    
    

目标检测 数据集转换


from PIL import Image
import glob
import numpy as np
import shutil
import os

img_dir = "./data/darktrain"
anno_dir = "./data/dark_anno_train"

file_list = glob.glob(img_dir+"/*")


def str2num(s):
    digits = {
     'Bicycle': 0, 'Boat': 1, 'Bottle': 2, 'Bus': 3, 'Car': 4, 'Cat': 5,
              'Chair': 6, 'Cup': 7, 'Dog': 8, 'Motorbike': 9, 'People': 10, 'Table': 11}

    return digits[s]

for file in file_list:
    img = Image.open(file)
    w = img.size[0]

    h = img.size[1]

    file_anno = os.path.split(file)[1]
    anno = os.path.join(anno_dir+"/"+file_anno+".txt")

    data = ''
    with open(anno,"r") as f:
        lines = f.readlines()
        # num_boxes = len(lines)
        for line in lines:
            if not line.startswith('%'):
                splited = line.strip().split()
                c = str2num(splited[0]) #class
                width = float(splited[3])  # w
                height = float(splited[4])  # h
                x = (float(splited[1]) + float(width/2))/w  # x
                y = (float(splited[2]) + float(height/2))/h  # y
                width = float(splited[3])/w  # w
                height = float(splited[4])/h  # h

                s = str(c)+' '+str(x)+' '+str(y)+' '+str(width)+' '+str(height)+'\n'
                data+=s

    with open(anno,'w') as f:
        f.write(data)





你可能感兴趣的:(python数据集处理小工具合集)