最近用到的进行数据处理的代码

1.修改文件名

txts=os.listdir("/home/ouyang/data/CholecSeg8K_liver/labels")
for txt in txts:
    new_txt=txt.split("_liver")[0]+".png"
    os.rename("/home/ouyang/data/CholecSeg8K_liver/labels/"+txt,"/home/ouyang/data/CholecSeg8K_liver/labels/"+new_txt)

2.划分训练集和测试集

import os
import random
from shutil import copy2

before_image = "/home/ouyang/data/CholecSeg8K_liver/images"
before_label = "/home/ouyang/data/CholecSeg8K_liver/labels"

# 划分后的训练集
after_train_image = "/home/ouyang/data/train/images"
after_train_label = "/home/ouyang/data/train/labels"

# 划分后的测试集
after_test_image = "/home/ouyang/data/test/images"
after_test_label = "/home/ouyang/data/test/labels"

# 所有的原始image的图片名字读入到一个列表里面
image = os.listdir(before_image)
# 比如列表image=['1.jpg', '2.jpg', '3.jpg'.....]

# 同理把原始label的图片名字读入到一个列表里面
label = os.listdir(before_label)
# 比如列表label=['1.jpg', '2.jpg', '3.jpg'.....]

# 读出image列表的长度:
length = len(image)
# 生成一个[0,length-1]的列表,称之为索引列表
# 比如列表index_list=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
index_list = list(range(length))
# 打乱index_list列表
random.shuffle(index_list)
# 此时index_list=[4, 0, 2, 7, 5, 8, 1, 6, 3, 9]

count = 0
# 把打乱过后的索引列表[4, 0, 2, 7, 5, 8, 1, 6, 3, 9]中每个索引拿出来:
for i in index_list:
    # 比如说第一轮:image[i]=image[4],也就是说把image列表里第五张图片”5.jpg”这个字符串取出来
    # 文件夹和文件名进行拼接,得到文件的完整路径
    imageName = os.path.join(before_image, image[i])
    # 比如imageName=“E:/桌面/before/image/5.jpg”

    # label图片也是同理,得到相应的label图片的完整路径
    labelName = os.path.join(before_label, label[i])

    if count < length * 0.8:
        # copy2函数:前面第一个参数是原始文件路径,第二个参数是复制到哪个文件夹里面
        copy2(imageName, after_train_image)
        copy2(labelName, after_train_label)
    else:
        copy2(imageName, after_test_image)
        copy2(labelName, after_test_label)
    count = count + 1

3.将一张image和一张label做成一个npz文件

def npz():
    #图像路径
    path = '/home/ouyang/TransUNet-main/data/images/*.png'
    #项目中存放训练所用的npz文件路径
    path2 = '/home/ouyang/TransUNet-main/data/Synapse/train_npz/'
    for i,img_path in enumerate(glob.glob(path)):
    	#读入图像
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        #读入标签
        label_path = img_path.replace('images','labels')
        label = cv2.imread(label_path,flags=0)
		#保存npz
        np.savez(path2+str(i),image=image,label=label)
        print('------------',i)

    # 加载npz文件
    # data = np.load(r'G:\dataset\Unet\Swin-Unet-ori\data\Synapse\train_npz\0.npz', allow_pickle=True)
    # image, label = data['image'], data['label']

    print('ok')
npz()

4.图片进行resize,一般是resize为224或者是512

import os
from PIL import Image
path = r"F:/fangkun/Task07_Pancreas_png_center_250_width_800/imagesTr_test_all/"
output_path = r"F:/fangkun/Task07_Pancreas_png_center_250_width_800/imagesTr_test_all_224/"
for filename in os.listdir(path):
    print(filename)
    width = 224
    height = 224
    img = Image.open(path+filename)
    img = img.resize((width, height), Image.ANTIALIAS)
    img.save(output_path + filename, "png")

你可能感兴趣的:(深度学习,深度学习)