Pytorch之CNN实战

训练脚本Train.py的大致流程

  • 首先要从torchvision包中import datasets和transforms, 用于加载数据集。而这又分两种情况,数据集在Pytorch中已有或者自定义数据集

    • Pytorch中已有(以CIFAR10举例)

      #data和./data一样,都是当前目录下创建一个data文件夹,在里面下载
      #transforms对图片做变换,normalize正则化将图片
      train_set = datasets.CIFAR10('./data',train=True,download=True, transform=transforms.Compose([
          		 transforms.Resize((32, 32)),
                   transforms.ToTensor(),
                   transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                   	  std=[0.229, 0.224, 0.225])
          ]))
      
    • 自定义数据集(例如Pokemon,与后文对应)

      #传入自定义数据集的参数(根目录,输出图片大小,模式)
      train_set = Pokemon('pokemon', 224, mode='train')
      
  • 然后从torch.utils.data包中import DataLoader,用于一次加载多张照片

    #shuffle代表随机化,将数据打散
    #batchsz代表一次处理的图片数量,不能太小,会不稳定
    train_loader = DataLoader(train_set,batch_size=batchsz,shuffle=True)
    
  • 之后初始化网络,设置gpu, 设置优化器及loss.需要从torchvision中import nn和optim

    device = torch.device('cuda:0')
    #这里ResNet是使用的模型(可以是自己写的),下文中net就代指此网络
    net = ResNet().to(device)
    #设置SGD或Adam优化器
    optimizer = optim.SGD(net.parameters(), lr=learning_rate)
    #loss使用交叉熵计算
    criteon = nn.CrossEntropyLoss().to(device)
    
  • 之后是对网络的训练和评价(以简单的分类问题为例)。

    for epoch in range(epochs):
        
        net.train()#进入训练模式,利于防止过拟合
    	for batchidx, (x, label) in enumerate(train_loader):
            x, label = x.to(device), label.to(device)
            logits = net(x)					#得出网络结果
            loss = criteon(logits, label)	#计算loss
            #返向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('epoch:{} loss:{}'.format(epoch, loss.item()))
        
    	net.eval()#进入测试模式,固定BN和dropout
        with torch.no_grad():#不计算梯度
    		total_correct = 0
            total_num = 0
            for x, label in test_loader:
                x, label = x.to(device), label.to(device)
                logits = net(x)	//自动调用net中的forward方法
                #dim=1代表不要列这个维度了,求每一行最大的列标号,成为这行唯一元素
                #每行最大的元素下标就是其类别结果,因此可将pred与label比较
                pred = logits.argmax(dim=1)
                #eq是一个一个比较,相等取1,不等取0,求和就是相等即正确的个数
                correct = torch.eq(pred, label).float().sum().item()
                total_correct += correct
                #向量x第0维的大小,即一个batch元素个数
                total_num += x.size(0)
            acc = total_correct/total_num
            print('epoch:{} acc:{}'.format(epoch, acc))
    
  • 如果要保存最佳模型参数,则在每个epoch训练后进行验证,如果发现验证集准确率高于最好准确率,更新最好准确率,存储此时的模型参数,使用时再将其加载。

    #存到当前目录下,名为'best.mdl'
    torch.save(net.state_dict(),'best.mdl')
    #全部训练结束后,加载最好模型参数用于测试
    net.load_state_dict(torch.load('best.mdl'))
    

CNN模型

以ResNet18为例, 写一下一个CNN模型有哪些步骤。首先,所有的自定义模型都需要继承nn.Module,需要实现init方法和forward方法(初始化和前向传播),下面以Lenet5举例。

  • __init__方法

    def __init__(self, ch_in, ch_out, stride=1):
    	super(Lenet5, self).__init__()#这句话必须要有
        #可以是单个层,也可以是一个sequential(多层在一起)
        self.conv_unit = nn.Sequential(
        	nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=0),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=0),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
        )
        self.fc_unit = nn.Sequential(
            nn.Linear(32*5*5, 32),
            nn.ReLU(),
            nn.Linear(120, 84),
        	nn.ReLU(),
            nn.Linear(32, 10)
        )
        self.conv1 =
        nn.Conv2d(ch_in,ch_out,kernel_size=3, stride=stride,padding=1)
        self.bn1 = nn.BatchNorm2d(ch_out)
        self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(ch_out)
    
  • forward方法

    def forward(self, x):
    	x = self.conv_unit(x)
    	# [b, 16, 5, 5] => [b, 16*5*5], x.size(0)就是batch_size大小
    	x = x.view(x.size(0), -1)
    	# [b, 16*5*5] => [b, 10]
    	out = self.fc_unit(x)
    	return out
    
  • 神经网络层的函数

    • nn.Conv2d: 2维卷积

      #nn.Conv2d(输入chennel,输出chennel,卷积核大小,stridding,padding)
      
    • nn.MaxPool2d:2维池化

      #nn.MaxPool2d(卷积核大小,stridding ,padding)
      
    • nn.BatchNorm2d:将数据norm到一定范围内,防止梯度离散

      #nn.BatchNorm2d(期望输入特征数(一般是chennel数),eps,动量,affine仿射)
      
    • nn.Dropout:随机失活

      #nn.Dropout(p(不保留的节点比例),inplace=True(原地操作))
      
  • 举一个手写ResNet18的例子(ResBlk结构如下图所示)

Pytorch之CNN实战_第1张图片

class ResBlk(nn.Module):#Residual BOX块
    #stride=2维度减半,可以保证参数不会不停翻倍
    def __init__(self, ch_in, ch_out, stride=1):
        super(ResBlk, self).__init__()
        self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(ch_out)
        self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(ch_out)

        self.extra = nn.Sequential()
        if ch_out != ch_in:		#防止无法做element-wise加法
            self.extra = nn.Sequential(
                nn.Conv2d(ch_in,ch_out,kernel_size=1,stride=stride),
                nn.BatchNorm2d(ch_out)
            )
            
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        #短接, element-wise加法。[b,ch_in,h,w] => [b,ch_out,h,w]
        out = self.extra(x) + out
        out = F.relu(out)
        return out
    
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=3, padding=0),
            nn.BatchNorm2d(64)
        )
        # followed 4 blocks
        self.blk1 = ResBlk(64, 128, stride=2)
        self.blk2 = ResBlk(128, 256, stride=2)
        self.blk3 = ResBlk(256, 512, stride=2)
        self.blk4 = ResBlk(512, 512, stride=2)
		# output 层
        self.outlayer = nn.Linear(512*1*1, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        # [b, 64, h, w] => [b, 512, h, w]
        x = self.blk1(x)
        x = self.blk2(x)
        x = self.blk3(x)
        x = self.blk4(x)
        # [b, 512, h, w] => [b, 512, 1, 1] 强行池化为1*1大小
        x = F.adaptive_avg_pool2d(x, [1, 1])
        x = x.view(x.size(0), -1)
        x = self.outlayer(x)
        return x

自定义数据集

以Pokemon数据集为例:

  • 顶层目录为:Pokemon
  • 下层目录为:5个类别,每个类别文件夹下有该类别的许多张图片
  • 自定义数据集,继承Dataset,一定要实现init(), len(), getitem()方法
class Pokemon(Dataset):
    def __init__(self, root, resize, mode):
        super(Pokemon, self).__init__()
        self.root = root
        self.resize = resize
        self.name2label = {} #路径 => label
        for name in sorted(os.listdir(os.path.join(root))):
            if not os.path.isdir(os.path.join(root, name)):#不是目录
                continue
            self.name2label[name] = len(self.name2label.keys())
        # image, label
        self.images, self.labels = self.load_csv('images.csv')
		# 三种模式
        if mode=='train': # 60%
            self.images = self.images[:int(0.6*len(self.images))]
            self.labels = self.labels[:int(0.6*len(self.labels))]
        elif mode=='val': # 20% = 60%->80%
            self.images = self.images[int(0.6*len(self.images)):int(0.8*len(self.images))]
            self.labels = self.labels[int(0.6*len(self.labels)):int(0.8*len(self.labels))]
        else: # 20% = 80%->100%
            self.images = self.images[int(0.8*len(self.images)):]
            self.labels = self.labels[int(0.8*len(self.labels)):]

    def load_csv(self, filename):
        # 如果第一次运行,先保存
        if not os.path.exists(os.path.join(self.root, filename)):
            images = []
            for name in self.name2label.keys():
                # name 格式:'pokemon\\mewtwo\\00001.png'
                # 匹配该目录下所有符合格式的文件,以list返回
                images+=glob.glob(os.path.join(self.root,name,'*.png'))
                images+=glob.glob(os.path.join(self.root,name,'*.jpg'))
            random.shuffle(images)
            with open(os.path.join(self.root, filename), mode='w', newline='') as f:
                writer = csv.writer(f)
                for img in images: 
                    # img : 'pokemon\\bulbasaur\\00000000.png'
                    name = img.split(os.sep)[-2]
                    label = self.name2label[name]
                    writer.writerow([img, label])
                print('writen into csv file:', filename)
        # read from csv file
        images, labels = [], []
        with open(os.path.join(self.root, filename)) as f:
            reader = csv.reader(f)
            for row in reader:
                # 'pokemon\\bulbasaur\\00000000.png', 0
                img, label = row
                label = int(label)
                images.append(img)
                labels.append(label)
        assert len(images) == len(labels)
        return images, labels

    def __len__(self):
        return len(self.images)


    def denormalize(self, x_hat):	# 维度缩放
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
        # mean: [3] => [3, 1, 1]
        mean = torch.tensor(mean).unsqueeze(1).unsqueeze(1)
        std = torch.tensor(std).unsqueeze(1).unsqueeze(1)
        x = x_hat * std + mean
        return x


    def __getitem__(self, idx):
        img, label = self.images[idx], self.labels[idx]
        tf = transforms.Compose([
            # string path => image data
            lambda x:Image.open(x).convert('RGB'), 
            transforms.Resize(int(self.resize*1.25), int(self.resize*1.25)),
            transforms.RandomRotation(15),
            transforms.CenterCrop(self.resize),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
        img = tf(img)
        label = torch.tensor(label)
        return img, label
# 求网络参数个数
num = sum(map(lambda p : p.numel(), net.parameters()))

迁移学习

比如ImageNet的数据集合Pokemon数据集的分布比较接近,我就可以使用ImageNet上训练好的模型来解决这个任务,这就是迁移学习。迁移学习的重点在于迁移什么知识以及如何迁移知识,对于如何迁移,我们可以有很多方法。比如取AlexNet输出直接加SVM分类,也可以固定前面参数,重新单独训练最后一层,其中后面这种方法叫做fine-tuning。

trained_model = resnet18(pretrained=True)
model = nn.Sequential( # 取前17层,训练最后一层
    *list(trained_model.children())[:-1], #[b, 512, 1, 1]
    Flatten(), # [b, 512, 1, 1] => [b, 512]
    nn.Linear(512, 5) # 使得输出类别适应本题
).to(device)
# Flatten 层如下
class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()
    def forward(self, x):
        shape = torch.prod(torch.tensor(x.shape[1:])).item()
        return x.view(-1, shape)
 1]
    Flatten(), # [b, 512, 1, 1] => [b, 512]
    nn.Linear(512, 5) # 使得输出类别适应本题
).to(device)


你可能感兴趣的:(Pytorch)