Pytorch深度学习框架

深度学习的核心是梯度下降:Gradient Descent

PyTorch生态

NLP: PyTorch NLP、 AllenNLP

视觉:TorchVision

图卷积网络:PyTorch geometric、Fast.ai

geometric 几何

部署PyTorch训练的模型:ONNX协议

PyTorch能做什么?

GPU加速

device=torch.device("cuda")
a.to(device)
b.to(device)

自动求导

#定义变量x=1,a=1,b=2,c=3
x=torch.tensor(1.)
a=torch.tensor(1.,requires_grad=True)
b=torch.tensor(2.,requires_grad=True)
c=torch.tensor(3.,requires_grad=True)
y=a**4*x+b*b*x+c
print("before:",a.grad,b.grad,c.grad)
#before: None None None

自动求导

gards=torch.autograd.grad(y,[a,b,c])
print("after:",gards[0],gards[1],gards[2])
#after: tensor(4.) tensor(4.) tensor(1.)

常用网络层

nn.Linear
nn.Conv2d
nn.LSTM

nn.ReLU
nn.Sigmoid

nn.Softmax
nn.CrossEntropyLoss
nn.MSE

Softmax是一种激活函数,它可以将一个数值向量归一化为一个概率分布向量,且各个概率之和为1。Softmax可以用来作为神经网络的最后一层,用于多分类问题的输出。Softmax层常常和交叉熵损失函数一起结合使用。

回归问题

##Linear Regression
import numpy as np

定义损失函数

##define Loss Function MSE
def compute_error_for_line_given_points(b,w,points):
    totalError=0
    for i in range(0,len(points)):
        x=points[i,0]
        y=points[i,1]
        totalError+=(y-(w*x+b))**2
    return totalError/float(len(points))

计算梯度更新参数

## compute gradient
def step_gradient(b_current,w_current,points,learningRate):
    b_gradient=0
    w_gradient=0
    N=float(len(points))
    for i in range(0,len(points)):
        x=points[i,0]
        y=points[i,1]
        w_gradient+=-(2/N)*x*(y-((w_current*x)+b_current)) #对w的偏导 N次循环做平均
        b_gradient+=-(2/N)*(y-((w_current*x)+b_current)) #对b的偏导 N次循环做平均
    new_w=w_current-learningRate*w_gradient #更新w
    new_b=b_current-learningRate*b_gradient #更新b
    return [new_b,new_w]

循环迭代

##iterate to optimize 循环迭代梯度信息
def gradient_descent_runner(points,starting_b,starting_w,learning_rate,num_iterations):
    b=starting_b
    w=starting_w
    for i in range(num_iterations):
        b,w=step_gradient(b,m,np.array(points),learning_rate)
    return [b,w]

run()函数

## run函数
def run():
    points = np.genformtxt("data.csv",delimiter=",")
    learning_rate = 0.0001
    initial_b = 0
    initial_w = 0
    num_iterations = 1000
    print("Starting gradient descent at b={0},w={1},error={2}".format(initial_b,initial_w,
                                                                      compute_error_for_line_given_points(initial_b,initial_w,points)))
    print("Running...")
    [b,w]=gradient_descent_runner(points,initial_b,initial_w,learning_rate,num_iterations)
    print("After {0} iterations b={1},w={2},error={3}".format(num_iterations,b,m,
                                                             compute_error_for_line_given_points(b,m,points)))

if __name__ =='main':
    run()

分类问题

mnist.py

import  torch
from    torch import nn
from    torch.nn import functional as F
from    torch import optim

import  torchvision
from    matplotlib import pyplot as plt

from    utils import plot_image, plot_curve, one_hot



batch_size = 512

# step1. load dataset
train_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('mnist_data', train=True, download=True,
                               transform=torchvision.transforms.Compose([
                                   torchvision.transforms.ToTensor(),
                                   torchvision.transforms.Normalize(
                                       (0.1307,), (0.3081,))
                               ])),
    batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('mnist_data/', train=False, download=True,
                               transform=torchvision.transforms.Compose([
                                   torchvision.transforms.ToTensor(),
                                   torchvision.transforms.Normalize(
                                       (0.1307,), (0.3081,))
                               ])),
    batch_size=batch_size, shuffle=False)

x, y = next(iter(train_loader))
print(x.shape, y.shape, x.min(), x.max())
plot_image(x, y, 'image sample')



class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        # xw+b
        self.fc1 = nn.Linear(28*28, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # x: [b, 1, 28, 28]
        # h1 = relu(xw1+b1)
        x = F.relu(self.fc1(x))
        # h2 = relu(h1w2+b2)
        x = F.relu(self.fc2(x))
        # h3 = h2w3+b3
        x = self.fc3(x)

        return x



net = Net()
# [w1, b1, w2, b2, w3, b3]
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)


train_loss = []

for epoch in range(3):

    for batch_idx, (x, y) in enumerate(train_loader):

        # x: [b, 1, 28, 28], y: [512]
        # [b, 1, 28, 28] => [b, 784]
        x = x.view(x.size(0), 28*28)
        # => [b, 10]
        out = net(x)
        # [b, 10]
        y_onehot = one_hot(y)
        # loss = mse(out, y_onehot)
        loss = F.mse_loss(out, y_onehot)

        optimizer.zero_grad()
        loss.backward()
        # w' = w - lr*grad
        optimizer.step()

        train_loss.append(loss.item())

        if batch_idx % 10==0:
            print(epoch, batch_idx, loss.item())

plot_curve(train_loss)
# we get optimal [w1, b1, w2, b2, w3, b3]


total_correct = 0
for x,y in test_loader:
    x  = x.view(x.size(0), 28*28)
    out = net(x)
    # out: [b, 10] => pred: [b]
    pred = out.argmax(dim=1)
    correct = pred.eq(y).sum().float().item()
    total_correct += correct

total_num = len(test_loader.dataset)
acc = total_correct / total_num
print('test acc:', acc)

x, y = next(iter(test_loader))
out = net(x.view(x.size(0), 28*28))
pred = out.argmax(dim=1)
plot_image(x, pred, 'test')

utils.py

import  torch
from    matplotlib import pyplot as plt


def plot_curve(data):
    fig = plt.figure()
    plt.plot(range(len(data)), data, color='blue')
    plt.legend(['value'], loc='upper right')
    plt.xlabel('step')
    plt.ylabel('value')
    plt.show()



def plot_image(img, label, name):

    fig = plt.figure()
    for i in range(6):
        plt.subplot(2, 3, i + 1)
        plt.tight_layout()
        plt.imshow(img[i][0]*0.3081+0.1307, cmap='gray', interpolation='none')
        plt.title("{}: {}".format(name, label[i].item()))
        plt.xticks([])
        plt.yticks([])
    plt.show()


def one_hot(label, depth=10):
    out = torch.zeros(label.size(0), depth)
    idx = torch.LongTensor(label).view(-1, 1)
    out.scatter_(dim=1, index=idx, value=1)
    return out

lenet5

定义网络结构

cifar10 数据集

import torch
from torch import nn
from torch.nn import functional as F

class Lenet5(nn.Module):
    """
    for cifar10 dataset
    """
    def __init__(self):
        super(Lenet5,self).__init__()

        # 卷积单元
        self.conv_unit=nn.Sequential(
            #新建一个卷积层    x: [b,3,32,32] => [b,6,32,32]
            nn.Conv2d(in_channels=3,out_channels=6,kernel_size=5,stride=1,padding=2),
            #新建一个池化层    [b,6,32,32] => [b,6,16,16]
            nn.MaxPool2d(kernel_size=2,stride=2,padding=0),
            #新建一个卷积层    [b,6,16,16] => [b,16,16,16]
            nn.Conv2d(in_channels=6,out_channels=16,kernel_size=5,stride=1,padding=2),
            # 新建一个池化层    [b,6,16,16] => [b,16,8,8]
            nn.MaxPool2d(kernel_size=2,stride=2,padding=0),
        )

        # 神经网络单元
        # 4维数据进行全连接需要打平,因为没有现成的打平类,所以需要自己写
        # flatten : 写在了forward() 里面 .view(b,-1)
        # fc unit
        self.fc_unit=nn.Sequential(
            #全连接层是Linear [b,16,8,8]
            nn.Linear(16*8*8,120),
            nn.ReLU(),
            nn.Linear(120,84),
            nn.ReLU(),
            nn.Linear(84,10),
        )

        #softmax()数据不稳定,所以不直接写出softmax,而是将softmax包含进CrossEntropy里面
        # define loss
        # self.criteon = nn.MSELoss() mean square error 均方差,但一般分类问题用CrossEntropy比较好
        # self.criteon = nn.CrossEntropyLoss()


    def forward(self,x):
        """
        :param x: [b,3,32,32]
        """
        batch_size=x.size(0)
        # [b,3,32,32] => [b,16,8,8]
        x=self.conv_unit(x)
        # [b,16,5,5] => [b,16*5*5]
        x=x.view(batch_size,16*8*8) # 也可以写 -1 自动推算为 16*8*8
        # [b,16*8*8] => [b,10]
        #一般习惯将softmax前面的变量称为logits
        logits=self.fc_unit(x)

        #logits [b,10]
        # pred=F.softmax(logits,dim=1)
        # CrossEntropy包含softmax操作,我们可以一步计算loss
        # loss= self.criteon(logits,y)
        return logits

def main():
    net = Lenet5()

    # 测试
    # tmp=torch.randn(2,3,32,32)
    # out=net(tmp)
    # print(out.shape)


if __name__=='__main__':
    main()

训练和测试流程

import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader #DataLoader方便一次加载多个数据
from torch import nn , optim

from lenet5 import Lenet5

def main():
    batch_size = 32
    #加载数据集 CIFAR
    #'cifar'在当前目录下新建一个cifar文件夹
    #transform就是对数据进行加强
    cifar_train=datasets.CIFAR10('cifar',train=True,transform=transforms.Compose([
        transforms.Resize((32,32)), #调整image尺寸
        transforms.ToTensor(), #转换成tensor
    ]),download=True)
    cifar_train=DataLoader(cifar_train,batch_size=batch_size,shuffle=True)

    cifar_test=datasets.CIFAR10('cifar',train=False,transform=transforms.Compose([
        transforms.Resize((32,32)), #调整image尺寸
        transforms.ToTensor(), #转换成tensor
    ]),download=True)
    cifar_test=DataLoader(cifar_test,batch_size=batch_size,shuffle=True)

    # x,label=next(iter(cifar_train)) #iter()获得迭代器, next()获得一个batch
    # print('x:',x.shape,'label:',label.shape)

    # device = torch.device('cuda')
    # model = Lenet5().to(device)
    model = Lenet5()

    #loss
    criteon = nn.CrossEntropyLoss()
    #optim.Adam()  用的比较多
    optimizer = optim.Adam(model.parameters(),lr=0.001)

    print(model) #打印结构

    #有些模型在train和test时是不一样的,比如BatchNormalization / DropOut
    #所以在train和test时最好声明《模型状态》 model.train() model.eval()
    for epoch in range(1000):
        model.train()
        for batchidx,(x,label) in enumerate(cifar_train):
            # x : [b,3,32,32]
            # label : [b]
            # x,label = x.to(device),label.to(device)

            logits=model(x) #实际上就是调用model的forward()方法
            # logits [b,10]
            #label [b]
            # pred是logits经过softmax的结果,因为cross entropy 自动先计算softmax
            #计算损失 loss 是 tensor scalar
            loss=criteon(logits,label)

            # backprop
            optimizer.zero_grad() #梯度清零
            loss.backward() #反向传播
            optimizer.step() #更新参数

       # 完成一个epoch , 打印一些提示信息
        print(epoch,loss.item())  #loss仅仅是这个epoch的最后一个batch的loss,仅做参考,还是acc更能说明问题

        #告诉pytorch这部分不需要在forward时构建计算图,不需要做backpropagate,更加安全防止数据污染
        with torch.no_grad():
            model.eval()
            #test
            #每一个batchidx训练结束都测试一下
            #测试数据重复用,但测试不会更新参数
            total_correct=0
            total_num=0
            for x ,label in cifar_test:
                # x : [b,3,32,32]
                # label : [b]
                # x,label=x.to(device) , label.to(device)

                # logits [b,10]
                logits = model(x)
                # 取logits最大位置位置位置为pred [b]
                pred= logits.argmax(dim=1)
                # 对答案 [b] VS [b] => scalar tensor 代表正确的数量
                #例如 [2,1,1] eq [2,0,1] => [1,0,1].float().sum() => [1.,0.,1.].sum() => 2.0 对2个
                total_correct+=torch.eq(pred,label).float().sum().item() # .item()转换成numpy
                #x.size(0) = b
                total_num+=x.size(0)

            acc=total_correct/total_num
            print("epoch",epoch,"acc :",acc)



if __name__=='__main__':
    main()

ResNet

残差网络

from torch import nn
from torch.nn import functional as F

class ResBlk(nn.Module):
    """
    resnet block
    """

    def __init__(self,ch_in,ch_out,stride=1):
        """
        :param ch_in:
        :param ch_out:
        """
        super(ResBlk,self).__init__()

        self.conv1 = nn.Conv2d(ch_in,ch_out,kernel_size=3,stride=stride,padding=1)
        self.bn1 = nn.BatchNorm2d(ch_out)
        #接收channel是bn1的输出,保持feature map的channel不变
        self.conv2 = nn.Conv2d(ch_out,ch_out,kernel_size=3,stride=1,padding=1)
        self.bn2 = nn.BatchNorm2d(ch_out)
        #BatchNorm2d将属性归到同一个范围方便train
        #一般而言ResNet里面要加BatchNorm2d
        #BatchNorm2d不改变channel

        # 转换语句
        self.extra = nn.Sequential()
        if ch_in != ch_out :
            # [b,ch_in,h,w] => [b,ch_out,h,w]
            self.extra = nn.Sequential(
                nn.Conv2d(ch_in,ch_out,kernel_size=1,stride=stride),
                nn.BatchNorm2d(ch_out),
            )


    def forward(self,x):
        """
        :param x: [b,ch,h,w]
        :return:
        """
        out= F.relu(self.bn1(self.conv1(x)))
        out=F.relu(self.bn2(self.conv2(out)))
        #加一个short cut 捷径
        # element-wise add 逐元素相加 : [b,ch_in,h,w] eith [b,ch_out,h,w]
        # out = x + out  若 ch_in 和 ch_out 不相同,无法逐元素相加 添加转换语句
        out = self.extra(x) + out

        return out

class ResNet18(nn.Module):

    def __init__(self):
        super(ResNet18,self).__init__()

        #预处理层
        self.conv1 = nn.Sequential(
            nn.Conv2d(3,64,kernel_size=3,stride=3,padding=0),
            nn.BatchNorm2d(64),
        )

        #resblk层
        # followed 4 blocks
        # [b,64,h,w] => [b,128,h,w]
        self.blk1 = ResBlk(64,128,stride=2)
        #[b,128,h,w] => [b,256,h,w]
        self.blk2 = ResBlk(128,256,stride=2)
        # [b,256,h,w] => [b,512,h,w]
        self.blk3=ResBlk(256,512,stride=2)
        # [b,512,h,w] => [b,1024,h,w]
        self.blk4=ResBlk(512,512,stride=2)
        # h,w也是变换的,feature map越来越多,h,w也应该相应的减少,使参数不会成倍的增加

        # [b,512*1*1]
        self.outlayer = nn.Linear(512*1*1,10)

    def forward(self,x):
        """
        :param x:
        :return:
        """
        x = F.relu(self.conv1(x))

        # [b,64,h,w] => [b,1024,h,w]
        x = self.blk1(x)
        x = self.blk2(x)
        x = self.blk3(x)
        x = self.blk4(x)

        # print("after conv:", x.shape) # [b,512,2,2]
        # [b,512,h,w] => [b,512,1,1] adaptive 自适应,无论h,w输入是什么都会得到1,1
        x=F.adaptive_max_pool2d(x,[1,1])
        # print("after pool:", x.shape)
        x=x.view(x.size(0),-1) # [b,512*1*1]
        x=self.outlayer(x) # 打平后放入全连接层

        return x

def main():
    pass
if __name__=='__main__' :
    main()
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader #DataLoader方便一次加载多个数据
from torch import nn , optim

from ResNet import ResNet18

def main():
    batch_size = 32
    #加载数据集 CIFAR
    #'cifar'在当前目录下新建一个cifar文件夹
    #transform就是对数据进行加强
    cifar_train=datasets.CIFAR10('cifar',train=True,transform=transforms.Compose([
        transforms.Resize((32,32)), #调整image尺寸
        transforms.ToTensor(), #转换成tensor
    ]),download=True)
    cifar_train=DataLoader(cifar_train,batch_size=batch_size,shuffle=True)

    cifar_test=datasets.CIFAR10('cifar',train=False,transform=transforms.Compose([
        transforms.Resize((32,32)), #调整image尺寸
        transforms.ToTensor(), #转换成tensor
    ]),download=True)
    cifar_test=DataLoader(cifar_test,batch_size=batch_size,shuffle=True)

    # x,label=next(iter(cifar_train)) #iter()获得迭代器, next()获得一个batch
    # print('x:',x.shape,'label:',label.shape)

    # device = torch.device('cuda')
    # model = Lenet5().to(device)
    model = ResNet18()

    #loss
    criteon = nn.CrossEntropyLoss()
    #optim.Adam()  用的比较多
    optimizer = optim.Adam(model.parameters(),lr=0.001)

    print(model) #打印结构

    #有些模型在train和test时是不一样的,比如BatchNormalization / DropOut
    #所以在train和test时最好声明《模型状态》 model.train() model.eval()
    for epoch in range(1000):
        model.train()
        for batchidx,(x,label) in enumerate(cifar_train):
            # x : [b,3,32,32]
            # label : [b]
            # x,label = x.to(device),label.to(device)

            logits=model(x) #实际上就是调用model的forward()方法
            # logits [b,10]
            #label [b]
            # pred是logits经过softmax的结果,因为cross entropy 自动先计算softmax
            #计算损失 loss 是 tensor scalar
            loss=criteon(logits,label)

            # backprop
            optimizer.zero_grad() #梯度清零
            loss.backward() #反向传播
            optimizer.step() #更新参数

       # 完成一个epoch , 打印一些提示信息
        print(epoch,loss.item())  #loss仅仅是这个epoch的最后一个batch的loss,仅做参考,还是acc更能说明问题

        #告诉pytorch这部分不需要在forward时构建计算图,不需要做backpropagate,更加安全防止数据污染
        with torch.no_grad():
            model.eval()
            #test
            #每一个batchidx训练结束都测试一下
            #测试数据重复用,但测试不会更新参数
            total_correct=0
            total_num=0
            for x ,label in cifar_test:
                # x : [b,3,32,32]
                # label : [b]
                # x,label=x.to(device) , label.to(device)

                # logits [b,10]
                logits = model(x)
                # 取logits最大位置位置位置为pred [b]
                pred= logits.argmax(dim=1)
                # 对答案 [b] VS [b] => scalar tensor 代表正确的数量
                #例如 [2,1,1] eq [2,0,1] => [1,0,1].float().sum() => [1.,0.,1.].sum() => 2.0 对2个
                total_correct+=torch.eq(pred,label).float().sum().item() # .item()转换成numpy
                #x.size(0) = b
                total_num+=x.size(0)

            acc=total_correct/total_num
            print("epoch",epoch,"acc :",acc)



if __name__=='__main__':
    main()

你可能感兴趣的:(深度学习,pytorch,python)