CV:Pytorch中梯度计算 Pytorch实现线性回归,MNIST数据集分类

Pytorch中梯度计算 Pytorch实现线性回归,MNIST数据集分类

1. Pytorch中梯度计算

import torch
"""
grad_fn:创建tensor的function, 如果一个tensor是直接用torch生成的,不是有其他张量计算得来的,那么grad_fn为None,该张量成为叶子节点
requires_grad = True,(创建张量时如果不指定,那么默认为False),为True时其上进行的各种操作就会被记录下来,它将开始追踪在其上的所有操作,从而
利用链式法则进行梯度传播,完成计算后,可以调用.backward()来完成所有梯度计算。此Tensor的梯度将累积到.grad属性中可以用.requires_grad_(True)
来改变.requires_grad的状态

"""

# 1. 张量的require_grad,grad_fn属性

a = torch.ones(2, 2)
print(a.requires_grad)
print(a.grad_fn)

a.requires_grad_(True)   # 在.requires_grad为False的情况下,即便张量是由运算得到的,该张量的grad_fn也为None
a = a*a + 2
print(a)
print(a.grad_fn)

# 2.我们不需要手动计算梯度,只需要搭建好前向传播的计算图,然后根据Pytorch中的autograd方法就可以得到所有张量的梯度,接着利用梯度下降不断更新权重
a = torch.tensor([[1, 2, 3]], dtype=torch.float32, requires_grad=True)
b = torch.tensor([[1],
                  [2],
                  [3]], dtype=torch.float32, requires_grad=True)
y = torch.mm(a, b)
y.backward()
print(a.grad)
print(b.grad)

2. Pytorch实现线性回归

import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import Variable  # Variable用来封装张量,以便进行梯度计算

# 1. 导入数组数据, 初始化w, b
x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168],
                    [9.779], [6.182], [7.59], [2.167], [7.042],
                    [10.791], [5.313], [7.997], [3.1]], dtype=np.float32)
y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573],
                    [3.366], [2.596], [2.53], [1.221], [2.827],
                    [3.465], [1.65], [2.904], [1.3]], dtype=np.float32)

x_train = Variable(torch.from_numpy(x_train))
y_train = Variable(torch.from_numpy(y_train))

w = Variable(torch.rand(1), requires_grad=True)
b = Variable(torch.ones(1), requires_grad=True)


# 2. 构建线性模型
def linear_model(x):
    return w*x + b


# # 3. 定义损失函数
# def loss(y_p, y):
#     return torch.mean((y_p - y) ** 2)


# y_prediction = linear_model(x_train)
# loss = loss(y_prediction, y_train)
# loss.backward()  # 反向传播求梯度
# # print(w.grad)
# # print(w.grad.zero_())
l =[]

# 4. 进行梯度下降
for e in range(20):
    lr = 1e-2

    y_prediction = linear_model(x_train)
    # print(y_prediction)
    loss = torch.mean((y_prediction-y_train)**2)
    # print(loss)

    loss.backward()  # 反向传播求梯度

    w.data -= lr*w.grad.data
    b.data -= lr*b.grad.data

    w.grad.zero_()
    b.grad.zero_()  # 梯度每次求的时候必须置0,否则会累加

    print('epoch:{}, loss:{}'.format(e, loss))
    l.append(loss.data)   # plt.plot(x_train.data.numpy, y_prediction)

plt.scatter(x_train.data.numpy(), y_train.numpy(), color='red')
x = np.linspace(0, 12, 20)
y = w.data*x + b.data
plt.plot(x, y)
plt.plot(x, l)
plt.show()
  1. MNIST分类实战
import torch
import torch.nn as nn
import numpy as np
from torch import optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision.datasets import mnist
from torchvision import transforms
import matplotlib.pyplot as plt
import random


# 1. 定义网络结构
class Net(nn.Module):
    def __init__(self, in_c=784, ot_c=10):
        super(Net, self).__init__()  # 这句话意思是找到Net的父类,然后把类Net的对象转为Net父类的对象,然后父类调用自己的初始化方法

        # 定义全连接层1,参数有该层的输入,以及节点数
        self.fc1 = nn.Linear(in_c, 512)
        # 定义激活层1,inplace = True,会直接改变输入数据的值,因此输出与输入同一个地址,节省反复申请与释放内存的空间与时间.
        self.act1 = nn.ReLU(inplace=True)  # 只是将原来的地址传递,效率更好

        # 定义全连接层2,注意参数是上一层节点数与该层节点数
        self.fc2 = nn.Linear(512, 256)
        self.act2 = nn.ReLU(inplace=True)

        self.fc3 = nn.Linear(256, 128)
        self.act3 = nn.ReLU(inplace=True)

        # 定义输出层,输出层实际上已是一个全连接层
        self.fc4 = nn.Linear(128, ot_c)

    def forward(self, x):
        x = self.act1(self.fc1(x))
        x = self.act2(self.fc2(x))
        x = self.act3(self.fc3(x))
        x = self.fc4(x)

        return x


# 2. 构建网络
net = Net()

# 3. 准备数据集,可以看出train_set和test_set可以进行切片操作,[idx][0]图像的数字形式,[idx][1]是对应的数字图像的真实标签
train_set = mnist.MNIST('./data', train=True, transform=transforms.ToTensor(), download=True)
test_set = mnist.MNIST('./data', train=False, transform=transforms.ToTensor(), download=True)
# print(train_set[0][0].numpy().shape)  (1, 28, 28)
# print(train_set[3][1])  1

# 数据载入
train_data = DataLoader(train_set, batch_size=64, shuffle=True)  # shuffle是洗牌,指将数据每次生成batch_size时都打乱图片
test_data = DataLoader(test_set, batch_size=128, shuffle=False)
# print(len(train_data))  938 这里把60000张图片分成938个batch每个batch含有64张图片,除不尽的说明最后一个batch图数量是32张
# print(len(test_data))  79 同上,测试集是10000张图片

# 可视化数据
for i in range(4):
    ax = plt.subplot(2, 2, i+1)
    idx = random.randint(0, len(train_set)-1)
    # print(len(train_set)) 60000 训练集60000张图片
    digit = train_set[idx][0].numpy()
    digit_img = digit.reshape(28, 28)
    # print(digit_img.shape) (28, 28)
    ax.imshow(digit_img, interpolation='nearest')
    ax.set_title('label:{}'.format(train_set[idx][1]), fontsize=10)

plt.show()

# 4. 损失函数和优化器
# 定义损失函数--交叉熵
criterion = nn.CrossEntropyLoss()

# 定义优化器---随机梯度下降,需要传入权重参数,学习率,权重衰减
# 权重衰减即L2正则化,目的是通过在Loss函数后加一个正则化项,通过使权重减小的方式,一定减少模型过拟合的问题
optimizer = optim.SGD(net.parameters(), lr=1e-2, weight_decay=5e-4)

# 5. 开始训练
# 记录训练损失函数值
losses = []
# 记录训练精度
acces = []
# 记录测试损失
eval_losses = []
# 记录测试精度
eval_acces = []
# 设置迭代次数
nums_epoch = 25

for epoch in range(nums_epoch):
    train_loss = 0
    train_acc = 0
    net = net.train()
    for batch, (img, label) in enumerate(train_data):  # batch数量为938,这里batch取值是[0,937]
        # print(type(img), len(img), img.shape, sep='\n') ,64,torch.Size([64, 1, 28, 28])
        img = img.view(img.size(0), -1)
        # print(len(img), img.shape, sep='\n') 64,torch.Size([64, 784])
        img = Variable(img)
        label = Variable(label)

        # 前向传播
        out = net(img)  # 同时训练64张图片
        # print(out.shape) torch.Size([64, 10])
        loss = criterion(out, label)
        # 反向传播
        optimizer.zero_grad()  # 梯度置0,不然会叠加梯度
        loss.backward()  # 反向传播,计算梯度
        optimizer.step()  # 更新参数

        # 计算分类的准确率
        _, pred = out.max(1)  # out.max(1)指的是那10个预测结果中的最大值
        num_correct = (pred == label).sum().item()
        acc = num_correct / img.shape[0]

        if (batch + 1) % 200 ==0:
            print('[INFO] Epoch-{}-Batch-{}: Train: Loss-{:.4f}, Accuracy-{:.4f}'.format(epoch + 1, batch+1,
                                                                                         loss.item(), acc))

        # 记录误差
        train_loss += loss.item()  # 最终循环结束后,是938个batch的loss相加的值,所以后面要除以len(train_data)
        train_acc += acc  # acc同样也是938个batch的acc之和

    losses.append(train_loss / len(train_data))
    acces.append(train_acc / len(train_data))

    eval_loss = 0
    eval_acc = 0
    # 测试集不训练
    for img, label in test_data:
        img = img.reshape(img.size(0), -1)
        img = Variable(img)
        label = Variable(label)

        out = net(img)
        loss = criterion(out, label)
        # 记录误差
        eval_loss += loss.item()

        _, pred = out.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / img.shape[0]

        eval_acc += acc

    eval_losses.append(eval_loss / len(test_data))  # 平均损失
    eval_acces.append(eval_acc / len(test_data))

    print('[INFO] Epoch-{}: Train: Loss-{:.4f}, Accuracy-{:.4f} | Test: Loss-{:.4f}, Accuracy-{:.4f}'.format(
        epoch + 1, train_loss / len(train_data), train_acc / len(train_data), eval_loss / len(test_data),
        eval_acc / len(test_data)))

# 6. 可视化测试集上损失值和正确率变化
plt.figure()
plt.title('Test', fontsize=12)
ax1 = plt.subplot(1, 2, 1)
ax1.plot(eval_losses, color='r')
ax1.plot(losses, color='b')
ax1.set_title('Loss', fontsize=10, color='black')
ax2 = plt.subplot(1, 2, 2)
ax2.plot(eval_acces, color='r')
ax2.plot(acces, color='b')
ax2.set_title('Acc', fontsize=10, color='black')
plt.show()

你可能感兴趣的:(CV:Pytorch中梯度计算 Pytorch实现线性回归,MNIST数据集分类)