pytorch 读取数据集(LiTS-肝肿瘤分割挑战数据集)

pytorch 读取数据集

我的数据集长这样:
xx.png和xx_mask.png是对应的待分割图像和ground truth
pytorch 读取数据集(LiTS-肝肿瘤分割挑战数据集)_第1张图片

读取数据集

数据集对象被抽象为Dataset类,实现自定义的数据集需要继承Dataset

getitem:返回一条数据,或一个样本。obj[index]等价于obj.__ getitem __(index)

len:返回样本的数量。len(obj)等价于obj.__ len__()
files = os.listdir(dirpath)打开文件夹
img.append([imgPath,maskPath])生成一个list包含img和label的路径
img_x = Image.open(xPath,‘r’)读取图片
img_x = self.transforms(img_x)做数据集变形操作

from torch.utils.data import Dataset
import os
import numpy as np
from PIL import Image

class MyDataset(Dataset):
    def __init__(self,dirpath,transforms = None,target_transforms = None):
        files = os.listdir(dirpath)
        len_files = len(files)//2
        img = []
        for i in range(len_files):
            imgPath = os.path.join(dirpath,"%03d.png"%i)
            maskPath = os.path.join(dirpath,"%03d_mask.png"%i)
            img.append([imgPath,maskPath])
        self.imgs = img
        #self.imgs 是一个list,self.imgs的一个元素是一个元组,包含图片路径,图片标签路径
        self.transforms = transforms
        self.tatget_transforms = target_transforms
        #这个transform里边可以实现 减均值,除标准差,随机裁剪,旋转,翻转,放射变换,等等操作
    def __getitem__(self, index):
        xPath,yPath = self.imgs[index]
        img_x = Image.open(xPath,'r')
        img_y = Image.open(yPath,'r')
        #Image.open读取图片信息
        if self.transforms is not None:
            img_x = self.transforms(img_x)
        if self.tatget_transforms is not None:
            img_y = self.tatget_transforms(img_y)
        return img_x,img_y
    def __len__(self):
        return len(self.imgs)

a = MyDataset(".\\data\\train")
n = np.array(a[1][0])
print(n.shape)

结果如下所示:
pytorch 读取数据集(LiTS-肝肿瘤分割挑战数据集)_第2张图片

计算数据集的均值和方差

(图片均为512*512)

from torch.utils.data import Dataset
import os
import numpy as np
from PIL import Image

class MyDataset(Dataset):
    def __init__(self,dirpath,transforms = None,target_transforms = None):
        files = os.listdir(dirpath)
        len_files = len(files)//2
        img = []
        for i in range(len_files):
            imgPath = os.path.join(dirpath,"%03d.png"%i)
            maskPath = os.path.join(dirpath,"%03d_mask.png"%i)
            img.append([imgPath,maskPath])
        self.imgs = img
        #self.imgs 是一个list,self.imgs的一个元素是一个元组,包含图片路径,图片标签路径
        self.transforms = transforms
        self.tatget_transforms = target_transforms
        #这个transform里边可以实现 减均值,除标准差,随机裁剪,旋转,翻转,放射变换,等等操作
    def __getitem__(self, index):
        xPath,yPath = self.imgs[index]
        img_x = Image.open(xPath,'r')
        img_y = Image.open(yPath,'r')
        #Image.open读取图片信息
        if self.transforms is not None:
            img_x = self.transforms(img_x)
        if self.tatget_transforms is not None:
            img_y = self.tatget_transforms(img_y)
        return img_x,img_y
    def __len__(self):
        return len(self.imgs)

a = MyDataset(".\\data\\train")
def meanX(a):
    R_channel_mean = 0
    G_channel_mean = 0
    B_channel_mean = 0
    R_channel_var = 0
    G_channel_var = 0
    B_channel_var = 0
    for i in range(len(a)):
        n = np.array(a[i][0])
        R_channel_mean = R_channel_mean + np.sum(n[:, :, 0])
        G_channel_mean = G_channel_mean + np.sum(n[:, :, 1])
        B_channel_mean = B_channel_mean + np.sum(n[:, :, 2])
    for i in range(len(a)):
        n = np.array(a[i][0])
        R_channel_var += np.sum((n[:, :, 0] - R_channel_mean) ** 2)
        G_channel_var += np.sum((n[:, :, 0] - G_channel_mean) ** 2)
        B_channel_var += np.sum((n[:, :, 0] - B_channel_mean) ** 2)
    R_channel_mean = R_channel_mean / 512 / 512/ len(a)
    G_channel_mean = G_channel_mean / 512 / 512 / len(a)
    B_channel_mean = B_channel_mean / 512 / 512 / len(a)
    R_channel_var = np.sqrt(R_channel_var)/ 512 / 512 / len(a)
    G_channel_var = np.sqrt(G_channel_var) / 512 / 512 / len(a)
    B_channel_var = np.sqrt(B_channel_var) / 512 / 512 / len(a)
    #图片是512*512的
    return R_channel_mean,G_channel_mean,B_channel_mean,R_channel_var,G_channel_var,B_channel_var

R,G,B,RV,GV,BV = meanX(a)
print(R,G,B,RV,GV,BV)

结果如下所示:
灰度图三通道均一致
pytorch 读取数据集(LiTS-肝肿瘤分割挑战数据集)_第3张图片
训练数据集

import os
import torch.nn as nn
from PIL import Image
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import argparse
from torch.utils.data import Dataset
import numpy as np


class MyDataset(Dataset):
    def __init__(self, root ,transform , target_transform):
        dir = os.listdir(root)
        lendir = len(dir)//2
        imgpath = []
        for i in range(lendir):
            imgs = os.path.join(root,"%03d.png"%i)
            mask = os.path.join(root,"%03d_mask.png"%i)
            imgpath.append([imgs,mask])
        self.imgpath = imgpath
        self.transform = transform
        self.target_transform = target_transform
    def  __getitem__(self, item):
        imgs,masks = self.imgpath[item]
        img = Image.open(imgs,'r')
        mask = Image.open(masks,'r')
        img = self.transform(img)
        mask = self.target_transform(mask)
        return img,mask
    def __len__(self):
        return len(self.imgpath)

class DoubleConv(nn.Module):
    def __init__(self,num_in,num_out):
        super(DoubleConv,self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(num_in,num_out,3,1,1),
            nn.BatchNorm2d(num_out),
            nn.ReLU(inplace=True),
            nn.Conv2d(num_out,num_out,3,1,1),
            nn.BatchNorm2d(num_out),
            nn.ReLU(inplace=True),
        )
    def forward(self,x):
        return self.conv(x)

class Unet(nn.Module):
    def __init__(self,num_in,num_out):
        super(Unet,self).__init__()
        self.conv1 = DoubleConv(num_in, 64)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = DoubleConv(64, 128)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = DoubleConv(128, 256)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.conv4 = DoubleConv(256, 512)
        self.pool4 = nn.MaxPool2d(2, 2)
        self.conv5 = DoubleConv(512, 1024)
        self.up6 = nn.ConvTranspose2d(1024, 512, 2, 2)
        self.conv6 = DoubleConv(1024, 512)
        self.up7 = nn.ConvTranspose2d(512, 256, 2, 2)
        self.conv7 = DoubleConv(512, 256)
        self.up8 = nn.ConvTranspose2d(256, 128, 2, 2)
        self.conv8 = DoubleConv(256, 128)
        self.up9 = nn.ConvTranspose2d(128, 64, 2, 2)
        self.conv9 = DoubleConv(128, 64)
        self.conv10 = nn.Conv2d(64, num_out, 1, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self,x):
        c1 = self.conv1(x)
        p1 = self.pool1(c1)
        c2 = self.conv2(p1)
        p2 = self.pool2(c2)
        c3 = self.conv3(p2)
        p3 = self.pool3(c3)
        c4 = self.conv4(p3)
        p4 = self.pool4(c4)
        c5 = self.conv5(p4)
        u6 = self.up6(c5)
        merge6 = torch.cat([c4, u6], dim=1)
        c6 = self.conv6(merge6)
        u7 = self.up7(c6)
        merge7 = torch.cat([u7, c3], dim=1)
        c7 = self.conv7(merge7)
        u8 = self.up8(c7)
        merge8 = torch.cat([u8, c2], dim=1)
        c8 = self.conv8(merge8)
        u9 = self.up9(c8)
        merge9 = torch.cat([u9, c1], dim=1)
        c9 = self.conv9(merge9)
        c10 = self.conv10(c9)
        out = self.sigmoid(c10)
        return out


path = "/content/drive/My Drive/Pytorch_try/unet/train"
path_test = "/content/drive/My Drive/Pytorch_try/unet/val"
x_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
y_transform = transforms.ToTensor()
#y_transform = transforms.ToTensor

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train():
    model = Unet(3,1).to(device)
    optimizer = torch.optim.Adam(model.parameters(),lr=args.learning_rate)
    dataset = MyDataset(path,x_transform,y_transform)
    cretersion = nn.BCELoss()
    dataloader = DataLoader(dataset,batch_size=args.batch_size,num_workers=args.num_workers)
    trainModel(model,optimizer,dataloader,cretersion,epoch = args.epoch)

def trainModel(model,optimizer,dataloader,cretersion,epoch):
    for i in range(epoch):
        print("epoch{}/{}".format(i+1,epoch))
        step = 0
        dataset_size = len(dataloader.dataset)
        for j,data in enumerate(dataloader):
            optimizer.zero_grad()
            img,mask = data
            step+=1
            inputs = img.to(device)
            labels = mask.to(device)
            outputs = model(inputs)    
            loss = cretersion(outputs,labels)
            loss.backward()
            optimizer.step()
            print("%d/%d,train_loss:%0.3f" % (step, dataset_size // dataloader.batch_size, loss.item()))
        torch.save(model.state_dict(),"weight_%d.pth"%i)

def test():
    model = Unet(3,1)
    model.load_state_dict(torch.load(args.weight,map_location='cpu'))
    dataset = MyDataset(path_test,x_transform,y_transform)
    testDataloader = DataLoader(dataset,batch_size=args.test_batch)
    model.eval()
    acc = 0
    with torch.no_grad():
        for i,data in enumerate(testDataloader):
            img,mask = data
            labels = mask
            inputs = img
            outputs = model(inputs)
            labels_numpy = np.array(labels)
            outputs_numpy = np.array(outputs)       
            #union = labels*outputs
            #acc = 2*union.sum()/(outputs.sum()+labels.sum())
            outputs_numpy = np.int64(outputs_numpy>0.99)
            union = labels_numpy*outputs_numpy
            sum_union = np.sum(union)
            sum_labels = np.sum(labels_numpy)
            sum_outputs = np.sum(outputs_numpy)
            print("union",sum_union,"labels",sum_labels,"outputs",sum_outputs)
            acc += 2*np.sum(union)/(np.sum(labels_numpy)+np.sum(outputs_numpy))
            print(i)
            print(acc/(i+1))



if __name__ == '__main__':
    ps = argparse.ArgumentParser()
    args = ps.parse_known_args()[0]
    args.batch_size = 4
    args.num_workers = 1
    args.learning_rate = 1e-3
    args.epoch = 5
    args.test_batch = 1
    args.weight ="/content/drive/My Drive/Pytorch_try/weight_1.pth"
    train()

pytorch 读取数据集(LiTS-肝肿瘤分割挑战数据集)_第4张图片

出现几个报错
问题一:TypeError: ToTensor() takes no arguments
pytorch 读取数据集(LiTS-肝肿瘤分割挑战数据集)_第5张图片
ToTensor()忘了加括号
pytorch 读取数据集(LiTS-肝肿瘤分割挑战数据集)_第6张图片
问题二:max_pool2d(): argument ‘input’ (position 1) must be Tensor, not Sequential
pytorch 读取数据集(LiTS-肝肿瘤分割挑战数据集)_第7张图片
上一层输出忘了加()
pytorch 读取数据集(LiTS-肝肿瘤分割挑战数据集)_第8张图片
not enough arguments for format string
pytorch 读取数据集(LiTS-肝肿瘤分割挑战数据集)_第9张图片
写到一个括号里
pytorch 读取数据集(LiTS-肝肿瘤分割挑战数据集)_第10张图片
注意:关于减少时间消耗
(1)只要是用到for循环、numpy都是在cpu上进行的,会消耗巨量的时间
(2)数据往cuda()上搬运会比较消耗时间,也就是说 .cuda()会比较消耗时间,能去掉就去掉,to(device)。
device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
(3)在服务器上,如果可以在一块gpu上运行就不要采用net = nn.DataParallel(net),这种gpu并行方式比单个gpu要耗时。

还存在一个问题:
xxxxxx is not implemented for type torch.LongTensor
用CrossEntropyLoss不行,改成了BCELoss(二进制交叉熵损失函数)就可以解决

你可能感兴趣的:(pytorch代码,深度学习)