Pytorch搭建ResNet18识别MNIST例程

Autograd自动求导

PyTorch 中所有神经网络的核心是 autograd 包,autograd包为张量上的所有操作提供了自动求导。torch.Tensor是这个包的核心类。如果设置 .requires_grad 为 True,那么将会追踪所有对于该张量的操作。 当完成计算后通过调用 .backward(),自动计算所有的梯度, 这个张量的所有梯度将会自动积累到 .grad 属性。

import torch

x = torch.ones(2,2,requires_grad = True)
y = x+2
z = y*y*3
out = z.mean()
out.backward()
print(x.grad)
"""
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
"""

构建神经网络

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(1,6,5)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
        
    def forward(self,x):
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)),(2))
        x = x.view(-1,self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self,x):
        size = x.size()[1:]
        num_features= 1
        for s in size:
            num_features*=s
        return num_features
    
net = Net()
print(net)


params = list(net.parameters())
#print(params)
input = torch.randn(1,1,32,32)
output = net(input)


target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()
loss = criterion(output,target)


net.zero_grad()     # 清除梯度
print('\nconv1.bias.grad before backward')
print(net.conv1.bias.grad)
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

optimizer = optim.SGD(net.parameters(),lr=0.01)
optimizer.zero_grad()
print('\n',net.conv1.bias)
optimizer.step()
print(net.conv1.bias)

"""
Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([ 0.0152,  0.0140,  0.0029,  0.0044, -0.0089, -0.0060])

 Parameter containing:
tensor([-0.1782, -0.0445, -0.0111,  0.0683,  0.1568,  0.1192],
       requires_grad=True)
Parameter containing:
tensor([-0.1782, -0.0445, -0.0111,  0.0683,  0.1568,  0.1192],
       requires_grad=True)
"""

为什么会有torch.nn和torch.nn.functional

拿conv2d举例,在nn中conv2d是个类,torch.nn.functional中是方法,nn中也是调用的torch.nn.functional的conv2d方法,涉及参数的比如卷积层和全连接层,带有参数的都是通过nn的模块来实现(不然每次定义卷积操作都要添加对weight和bias的定义很麻烦,所以nn中提前把重复的工作定义好了,内部还是调用了functional),relu和maxpooling等不需要参数训练的工作都在functional。(有时候又不需要对weight和bias操作的简单操作就可以共functional了,就比如relu和maxpooling)

所以说还是需要两种形式的,就说nn其实是在functional上的在包装。

torch使用GPU进行训练,对cuda的操作

https://blog.csdn.net/qq_21578849/article/details/85240797

主要分三步

  1. 将模型部署到GPU
  2. 将数据部署到GPU
  3. 将结果从cuda转为numpy

多卡训练-DataParallel

https://blog.csdn.net/weixin_40087578/article/details/87186613

并行计算只存在在前向传播

多卡训练的基本过程

  • 首先把模型加载到一个主设备
  • 把模型只读复制到多个设备
  • 把大的batch数据也等分到不同的设备
  • 最后将所有设备计算得到的梯度合并更新主设备上的模型参数

搭建ResNet18识别MNIST例程

环境

torch1.0.0

torchvision0.20

包含mnist.py和myNet.py俩个脚本

mnist数据下载需要科学上网,或者将单独下载好的四个文件所在的文件路径,替换掉脚本的数据加载部分,如图。可以在我github里找到数据

https://github.com/AishuaiYao/PyTorch

Pytorch搭建ResNet18识别MNIST例程_第1张图片

mnist.py

import torch
import torch.nn.functional as  F
import torch.optim as optim
from torch.utils import data
from torchvision import datasets,transforms
from torchsummary import summary
import myNet

batch_size = 128
epochs = 20
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_data = datasets.MNIST('./data', train=True, download=True,transform=transforms.Compose(
                            [transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
test_data =  datasets.MNIST('./data', train=False, transform=transforms.Compose(
                            [transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))


train_loader = torch.utils.data.DataLoader(train_data,batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data,batch_size=batch_size, shuffle=True)

# model = myNet.ConvNet().to(device)
model = myNet.ResNet().to(device)
summary(model,(1,28,28))
optimizer = optim.Adam(model.parameters())

def train(model,device,train_loader,optimizer,epoch):
    model.train()
    for batch_idx,(data,target) in enumerate(train_loader):
        data,target = data.to(device),target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output,target)
        loss.backward()
        optimizer.step()
        if (batch_idx)%30 == 0:
            print('train {} epoch : {}/{} \t loss : {:.6f}'.format(
                                                    epoch,batch_idx*len(data),len(train_loader.dataset),loss.item()))


def test(model,device,test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():#如果.requires_grad=True但是你又不希望进行autograd的计算, 那么可以将变量包裹在 with torch.no_grad()中
        for data,target in test_loader:
            data,target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output,target,reduction='sum').item()
            pred = output.max(1,keepdim = True)[1]
            correct +=pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nvalid loss : {:.4f} \t accuracy : {:.3f}%\n'.format(
                                                    test_loss,100. * correct / len(test_loader.dataset)))

for epoch in range(epochs):
    train(model,device,train_loader,optimizer,epoch)
    test(model,device,test_loader)

myNet.py

import torch
import torch.nn as nn
import torch.nn.functional as F


class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,10,5)
        self.conv2 = nn.Conv2d(10,20,3)
        self.fc1 = nn.Linear(20*10*10,500)
        self.fc2 = nn.Linear(500,10)

    def forward(self, x):
        in_size = x.size(0)
        out = self.conv1(x)
        out = F.relu(out)
        out = F.max_pool2d(out,2,2)

        out = self.conv2(out)
        out = F.relu(out)
        out = out.view(in_size,-1)#torch.view: 可以改变张量的维度和大小,与Numpy的reshape类似

        out = self.fc1(out)
        out = F.relu(out)
        out = self.fc2(out)
        out = F.log_softmax(out,dim = 1)
        return out


class BasicBlock(nn.Module):
    def __init__(self,inchannel,outchannel,s = 1):
        nn.Module.__init__(self)
        self.left = nn.Sequential(
            nn.Conv2d(inchannel,outchannel,kernel_size=3,stride = s,padding=1),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace = True),
            nn.Conv2d(outchannel,outchannel,kernel_size=3,stride = 1,padding=1),
            nn.BatchNorm2d(outchannel)
        )
        self.shortcut = nn.Sequential()
        if s != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel,outchannel,kernel_size=1,stride =s),
                nn.BatchNorm2d(outchannel)
            )

    def forward(self,x):
        out = self.left(x)
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self,residualBlock=BasicBlock,n_class=10):
        nn.Module.__init__(self)
        self.inchannel = 64
        self.conv1  = nn.Sequential(
            nn.Conv2d(1,64,kernel_size=7,stride = 2,padding=3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace = True),
        )
        self.pooling = nn.Sequential(nn.MaxPool2d(kernel_size=3,stride=2,padding=1))

        self.layer1 = self.maker_layer(residualBlock,64,2,s = 1)
        self.layer2 = self.maker_layer(residualBlock,128,2,s = 2)
        self.layer3 = self.maker_layer(residualBlock,256,2,s = 2)
        self.layer4 = self.maker_layer(residualBlock,512,2,s = 2)
        self.fc = nn.Linear(512,n_class)


    def maker_layer(self,block,channels,n_blocks,s):
        strides = [s]+[1]*(n_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel,channels,stride))
            self.inchannel = channels
        return nn.Sequential(*layers)


    def forward(self,x):
        out = self.conv1(x)
        out = self.pooling(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out,4)
        out = out.view(out.size(0),-1)
        out = self.fc(out)

        return out

 

你可能感兴趣的:(深度学习框架)