【分割数据集操作集锦】毕设记录

1. 按要求将CSV文件转成json文件

有时候一些网络模型的源码会有data.json这样的文件里面存放了训练集和验证集的信息,这里我们根据csv格式的表格生成json文件。

以下代码有下述功能:

  1. 选出同时出现在csv文件里和训练集文件夹中同名的文件按照自己需要加入列表。
  2. 将序号补全成文件名  fname='amos_'+str(id).zfill(4)+'.nii.gz'
  3. 根据json格式将1中列表补入字典

# 读取数据
import os
domainAB=[]
domainC=[]
imglist = os.listdir('/media/fsk/DATA1/AMOS22_total/imagesTr')
import csv
with open('/home/fsk/monai/nnprocess/data_perpare/data/data1.csv',encoding='UTF-8-sig') as csvfile:
    reader=csv.DictReader(csvfile)
    for i,row in enumerate(reader):
        id=row['id']
        fname='amos_'+str(id).zfill(4)+'.nii.gz'
        #print(fname)
        if fname in imglist:
            if i>=0 and i<=249:
                domainAB.append(fname)
            if i>=372 and i<=499:
                domainC.append(fname)
            
# print(domainAB)
# print(domainC)

dataset={
    "name": "AMOS", 
    "description": "Amos: A large-scale abdominal multi-organ benchmark for versatile medical image segmentation", 
    "author": "Yuanfeng Ji", 
    "reference": "SRIDB x CUHKSZ x HKU x LGCHSZ x LGPHSZ", 
    "licence": "CC-BY-SA 4.0", 
    "release": "1.0 01/05/2022", 
    "contact": "[email protected]", 
    "tensorImageSize": "3D", 
    "modality": {"0": "CT"}, 
    "labels": {
        "0": "background", 
        "1": "spleen",  
        "2": "gall bladder", 
        "3": "esophagus", 
        "4": "liver", 
        "5": "stomach", 
        "6": "arota", 
        "7": "pancreas", 
        "8": "right adrenal gland", 
        "9": "left adrenal gland"
        },
    "numTraining": len(domainAB)+len(domainC),
    "numTest":len(domainC)
    }

datasetDG={
    "name": "AMOS", 
    "description": "Amos: A large-scale abdominal multi-organ benchmark for versatile medical image segmentation", 
    "author": "Yuanfeng Ji", 
    "reference": "SRIDB x CUHKSZ x HKU x LGCHSZ x LGPHSZ", 
    "licence": "CC-BY-SA 4.0", 
    "release": "1.0 01/05/2022", 
    "contact": "[email protected]", 
    "tensorImageSize": "3D", 
    "modality": {"0": "CT"}, 
    "labels": {
        "0": "background", 
        "1": "spleen",  
        "2": "gall bladder", 
        "3": "esophagus", 
        "4": "liver", 
        "5": "stomach", 
        "6": "arota", 
        "7": "pancreas", 
        "8": "right adrenal gland", 
        "9": "left adrenal gland"
        },
    "numTraining": len(domainAB),
    "numTest":len(domainC)
    }
training=[]
trainingDG=[]
test=[]
for i in range(len(domainAB)):
    img="./imagesTr/"+domainAB[i]
    label="./labelsTr/"+domainAB[i]
    dic={"image":img,"label":label}
    training.append(dic)
    trainingDG.append(dic)


for i in range(len(domainC)):
    img="./imagesTr/"+domainC[i]
    label="./labelsTr/"+domainC[i]
    dic={"image":img,"label":label}
    training.append(dic)
    test.append(img)

    
dataset['training']=training
datasetDG['training']=trainingDG

dataset['test']=test
datasetDG['test']=test
import json
with open('data/dataset.json','w') as fp:
    json.dump(dataset,fp)
    
with open('data/datasetDG.json','w') as fp:
    json.dump(datasetDG,fp)

2. 批量复制文件,将dir1中和dir2中交集文件全复制到dir3

# 导入os模块和shutil模块
import os
import shutil

# 定义三个文件夹的路径
dir1 = "/media/fsk/DATA1/AMOS22_total/labelsTr"
dir2 = "/media/fsk/DATA1/nnunet/nnUNet_raw/nnUNet_raw_data/Task216_AMOS2022_task2_AB/inferTs"
dir3 = "/media/fsk/DATA1/nnunet/nnUNet_raw/nnUNet_raw_data/Task216_AMOS2022_task2_AB/labelsTs"

# 遍历dir1中的文件
for file in os.listdir(dir1):
    # 拼接文件的完整路径
    file_path = os.path.join(dir1, file)
    # 判断是否是文件,而不是文件夹
    if os.path.isfile(file_path):
        # 判断dir2中是否存在同名文件
        if os.path.exists(os.path.join(dir2, file)):
            # 复制文件到dir3中,如果已存在则覆盖
            shutil.copy(file_path, dir3)

3. 批量修改文件名

  1. 补全文件名,比如在末尾加_000
  2. 将"img"改成"label"
import os
folder_path="/media/fsk/DATA1/BTCV/imagesTr"
for file in os.listdir(folder_path):
    filepath=os.path.join(folder_path,file)
    newfile=file.split('.')[0]+"_0000.nii.gz"
    #newfile=file.replace("img","label")
    newpath=os.path.join(folder_path,newfile)
    os.rename(filepath,newpath)

4. 多个数据集标签统一

这边需要特别注意!!!!

在替换标签值的时候注意顺序,比如: 如果先将label=1的设为label=5 然后再将label=5的设为label=7,那么label=1和label=5的都会变成label=7。


#顺序
#"0": "background", "1": "spleen",  "2": "gall bladder", "3": "esophagus", "4": "liver", "5": "stomach", "6": "arota", "7": "pancreas", "8": "right adrenal gland", "9": "left adrenal gland"
# 导入nibabel包
import nibabel as nib 
import numpy as np
import os
import time

# 读取amos数据和标签
input_path='/media/fsk/DATA1/AbdomenCT/Mask'
output_path='/media/fsk/DATA1/AbdomenCT/new_Mask'
labels = os.listdir(input_path)
for label in labels:
    # print(label,os.path.join(input_path,label))
    amos_label = nib.load(os.path.join(input_path,label))
    # 获取浮点数矩阵
    amos_flabel = amos_label.get_fdata()
    # 获取不同的标签值
    print(label,np.unique(amos_flabel))
    # 替换你想要修改的标签值
    amos_flabel[np.where(amos_flabel == 6)] = 0
    amos_flabel[np.where(amos_flabel == 2)] = 0
    amos_flabel[np.where(amos_flabel == 12)] = 0
    amos_flabel[np.where(amos_flabel == 5)] = 6
    amos_flabel[np.where(amos_flabel == 7)] = 5
    amos_flabel[np.where(amos_flabel == 4)] = 7
    amos_flabel[np.where(amos_flabel == 1)] = 4
    amos_flabel[np.where(amos_flabel == 3)] = 1    
    amos_flabel[np.where(amos_flabel == 8)] = 2
    amos_flabel[np.where(amos_flabel == 9)] = 3
    amos_flabel[np.where(amos_flabel == 10)] = 8
    amos_flabel[np.where(amos_flabel == 11)] = 9
    


    new_amos_label=nib.Nifti1Image(amos_flabel,amos_label.affine)
    nib.save(new_amos_label,os.path.join(output_path,label))
    new_label = nib.load(os.path.join(output_path,label))
    # 获取浮点数矩阵
    new_flabel = amos_label.get_fdata()
    # 获取不同的标签值
    print(label,np.unique(new_flabel))

5. 读取pkl数据

#读取.pkl格式的文件
import pickle
path='/media/fsk/DATA1/nnunet/nnUNet_preprocessed/Task216_AMOS2022_task1/nnUNetPlans_bfnnUNet_fabresnet_31_plans_3D.pkl'    
f=open(path,'rb')
data=pickle.load(f)
 
print(data)
print(len(data))

你可能感兴趣的:(#,语义分割,json,python,分割)