PyTorch入门——Mnist数据集分类

文章目录

      • 一、训练模型、保存模型
      • 二、加载模型、预测数据集图片
      • 三、预测单独一张图片(非数据集)

视频参考: https://www.bilibili.com/video/BV12i4y1G7ko?t=49

一、训练模型、保存模型

# 1 加载相关库
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms  # 对数据库和图片预处理


# 2 定义超参数
BATCH_SIZE = 128  # 每批处理的数据
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # cpu还是gpu,不写gpu写cuda
EPOCHS = 10  # 总共6w张,一个epoch表示一次训练完6w张, 10轮为60w张


# 3 构建pipeline,对图像做预处理
pipeline = transforms.Compose([transforms.ToTensor(),  # 将图片转为Tensor
                               transforms.Normalize((0.1307,), (0.3081, ))])  # 正则化:降低模型复杂度


# 4 下载和加载数据
from torch.utils.data import DataLoader
train_set = datasets.MNIST(root="./mnist_data/", train=True, transform=pipeline, download=True)  # 下载不成功,可以多试几次
test_set = datasets.MNIST(root="./mnist_data/", train=False, transform=pipeline, download=True)
train_loader = DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=False)  # shuffle打乱图片
test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=True)  # 可单独定义train和test的BATCH_SIZE


# 5 显示mnist中的图片
import matplotlib.pyplot as plt
images, labels = next(iter(train_loader))  # idx, (images, labels) = next(enumerate(train_loader)), images.shape = (128,1,28,28)
plt.imshow(images[0].squeeze().numpy())
plt.show()
plt.imsave("1.png", images[3].squeeze().numpy())  # 保存图片
print(images[0].squeeze().numpy().shape)  # plt.imshow(images[0][0])


# 6 构建网络模型

# 方式1
class Digit(nn.Module):  # 继承父类
    def __init__(self):  # 构造方法
        super().__init__()  # 调用继承父类的方法
        self.conv1 = nn.Conv2d(1, 10, 5)  # 1: 输入通道数,mnist数据集为灰度图单通道,10: 输出通道,5: 卷积核
        self.conv2 = nn.Conv2d(10, 20, 3)
        self.fc1 = nn.Linear(20*10*10, 500)  # 全连接层为线性层
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        input_size = x.size(0)  # x 这里针对读取到的train_loader(128, 1, 28, 28),128为batch
        x = self.conv1(x)  # 输入: (128, 1, 28, 28), 输出: (128, 10, 24, 24)  卷积后图像输出大小计算公式:N= W−F+2P)/S+1=(28-5+2*0)/1+1=24
        x = F.relu(x)  # 激活函数,保持shape不变,(128, 10, 24, 24)
        x = F.max_pool2d(x, 2, 2)  # 最大池化,缩小,(128, 10, 12, 12)
        x = self.conv2(x)  # 输入: (128, 10, 12, 12), 输出: (128, 20, 10, 10)  大小计算公式: (12-3+2*0)/1+1=10,通道数10->20
        x = F.relu(x)
        x = x.view(input_size, -1)  # 拉平,自动计算维度(128, 20, 10, 10) -> 128*2000
        x = self.fc1(x)  # 128*2000 -> 128*500
        x = F.relu(x)
        x = self.fc2(x)  # 128*500 -> 128*10
        out = F.softmax(x, dim=1)


# 方式2
class Digit2(nn.Module):  # 继承父类
    def __init__(self):  # 构造方法
        super().__init__()  # 调用继承父类的方法
        self.conv1 = nn.Conv2d(1, 10, 5)  # 1: 输入通道数,mnist数据集为灰度图,10: 输出通道,5: 卷积核
        self.conv2 = nn.Conv2d(10, 20, 3)
        self.fc1 = nn.Linear(20*10*10, 500)  # 全连接层为线性层
        self.fc2 = nn.Linear(500, 10)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        self.relu3 = nn.ReLU()
        self.maxpool2d1 = nn.MaxPool2d(2, 2)
        self.maxpool2d2 = nn.MaxPool2d(2, 2)
        self.softmax = nn.Softmax(dim=1)  # batch行,10列

    def forward(self, x):
        input_size = x.size(0)  # x 这里针对读取到的train_loader(128, 1, 28, 28),128为batch
        x = self.conv1(x)  # 输入: (128, 1, 28, 28), 输出: (128, 10, 24, 24)  卷积后图像输出大小计算公式:N= W−F+2P)/S+1=(28-5+2*0)/1+1=24
        x = self.relu1(x)  # 激活函数,保持shape不变,(128, 10, 24, 24),或者写成x = F.relu(x)
        x = self.maxpool2d1(x)  # 最大池化,缩小,(128, 10, 12, 12)
        x = self.conv2(x)  # 输入: (128, 10, 12, 12), 输出: (128, 20, 10, 10)  大小计算公式: (12-3+2*0)/1+1=10,通道数10->20
        x = self.relu2(x)
        x = x.view(input_size, -1)  # 拉平,自动计算维度(128, 20, 10, 10) -> 128*2000, 或者使用torch.flatten(x, 1)???
        x = self.fc1(x)  # 128*2000 -> 128*500
        x = self.relu3(x)
        x = self.fc2(x)  # 128*500 -> 128*10
        out = self.softmax(x)
        return out


# 7 创建模型和定义优化器
model = Digit2().to(DEVICE)  # 创建模型部署到设备上
optimizer = optim.Adam(model.parameters())  # 更新模型参数,使得最终模型参数达到最优值


# 8 定义训练方法
def train_model(model, device, train_loader, optimizer, epoch):
    model.train()  # 模型训练
    for batch_index, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)  # 数据部署到设备上
        optimizer.zero_grad()  # 梯度设置为0,不然梯度会累积
        output = model(data)  # 训练后结果
        loss = F.cross_entropy(output, target)  # 多分类交叉熵损失函数, 二分类函数可以选择用sigmoid,
        # CE = nn.CrossEntropyLoss(), loss = CE(output, target)
        loss.backward()  # 反向传播
        optimizer.step()  # 参数更新
        if batch_index % 100 == 0:  # 60000/128=469,每100次输出一个
            print("Train Epoch:{} \t loss:{:.6f}".format(epoch, loss.item()))  # 保留6位有效数字的float数, .item()方法 是得到一个元素张量里面的元素值


# 9 定义测试方法
def test_model(model, device, test_loader):
    model.eval()  # 模型验证
    correct = 0.0  # 准确率
    test_loss = 0.0  # 测试损失
    with torch.no_grad():  # test不需要计算梯度和不需要进行反向传播,所以不用grad
        for batch_index, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)  # 数据部署到device上
            output = model(data)  # 测试数据
            test_loss += F.cross_entropy(output, target).item()  # 计算测试损失
            pred = output.argmax(dim=1)  # 找到最大值的下标,1表示在第二维度查找。torch.max(output, dim=1) 或者 output.max(1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()  # 累计正确率
        test_loss /= len(test_loader.dataset)
        print("Test--Average loss:{:.4f}, Accuracy: {:.3f}\n".format(test_loss, 100.0*correct/len(test_loader.dataset)))


# 10 调用方法 8/9
for epoch in range(1, EPOCHS+1):
    train_model(model, DEVICE, train_loader, optimizer, epoch)
    test_model(model, DEVICE, test_loader)

# 11 模型保存
torch.save(model.state_dict(), "params_wuhd.pth")

二、加载模型、预测数据集图片



# 1 加载相关库
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms  # 对数据库和图片预处理


# 2 定义超参数
BATCH_SIZE = 128  # 每批处理的数据
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # cpu还是gpu,不写gpu写cuda


# 3 构建pipeline,对图像做预处理
pipeline = transforms.Compose([transforms.ToTensor(),  # 将图片转为Tensor
                               transforms.Normalize((0.1307,), (0.3081, ))])  # 正则化:降低模型复杂度


# 4 下载和加载数据
from torch.utils.data import DataLoader
test_set = datasets.MNIST(root="./mnist_data/", train=False, transform=pipeline, download=True)
test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=True)  # 可单独定义train和test的BATCH_SIZE


# 5 构建网络
class Digit2(nn.Module):  # 继承父类
    def __init__(self):  # 构造方法
        super().__init__()  # 调用继承父类的方法
        self.conv1 = nn.Conv2d(1, 10, 5)  # 1: 输入通道数,mnist数据集为灰度图,10: 输出通道,5: 卷积核
        self.conv2 = nn.Conv2d(10, 20, 3)
        self.fc1 = nn.Linear(20*10*10, 500)  # 全连接层为线性层
        self.fc2 = nn.Linear(500, 10)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        self.relu3 = nn.ReLU()
        self.maxpool2d1 = nn.MaxPool2d(2, 2)
        self.maxpool2d2 = nn.MaxPool2d(2, 2)
        self.softmax = nn.Softmax(dim=1)  # batch行,10列

    def forward(self, x):
        input_size = x.size(0)  # x 这里针对读取到的train_loader(128, 1, 28, 28),128为batch
        x = self.conv1(x)  # 输入: (128, 1, 28, 28), 输出: (128, 10, 24, 24)  卷积后图像输出大小计算公式:N= W−F+2P)/S+1=(28-5+2*0)/1+1=24
        x = self.relu1(x)  # 激活函数,保持shape不变,(128, 10, 24, 24),或者写成x = F.relu(x)
        x = self.maxpool2d1(x)  # 最大池化,缩小,(128, 10, 12, 12)
        x = self.conv2(x)  # 输入: (128, 10, 12, 12), 输出: (128, 20, 10, 10)  大小计算公式: (12-3+2*0)/1+1=10,通道数10->20
        x = self.relu2(x)
        x = x.view(input_size, -1)  # 拉平,自动计算维度(128, 20, 10, 10) -> 128*2000, 或者使用torch.flatten(x, 1)???
        x = self.fc1(x)  # 128*2000 -> 128*500
        x = self.relu3(x)
        x = self.fc2(x)  # 128*500 -> 128*10
        out = self.softmax(x)
        return out


# 6 创建模型和定义优化器
model = Digit2().to(DEVICE)  # 创建模型部署到设备上
model.load_state_dict(torch.load("params_wuhd.pth"))


# 7 定义测试方法
def test_model(model, device, test_loader):
    model.eval()  # 模型验证
    correct = 0.0  # 准确率
    test_loss = 0.0  # 测试损失
    with torch.no_grad():  # test不需要计算梯度和不需要进行反向传播,所以不用grad
        for batch_index, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)  # 数据部署到device上
            output = model(data)  # 测试数据
            test_loss += F.cross_entropy(output, target).item()  # 计算测试损失
            pred = output.argmax(dim=1)  # 找到最大值的下标,1表示在第二维度查找。torch.max(output, dim=1) 或者 output.max(1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()  # 累计正确率
        test_loss /= len(test_loader.dataset)
        print("Test--Average loss:{:.4f}, Accuracy: {:.3f}\n".format(test_loss, 100.0*correct/len(test_loader.dataset)))


# 8 调用方法
test_model(model, DEVICE, test_loader)


三、预测单独一张图片(非数据集)


# 1 加载相关库
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import datasets, transforms  # 对数据库和图片预处理


# 2 定义超参数
BATCH_SIZE = 128  # 每批处理的数据
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # cpu还是gpu,不写gpu写cuda


# 3 构建pipeline,对图像做预处理
#  ##读取单通道图片---法1
pipeline = transforms.Compose([transforms.Grayscale(1),
                               transforms.ToTensor(),  # 将图片转为Tensor
                               transforms.Normalize((0.1307,), (0.3081,))])  # 正则化:降低模型复杂度
dataset = datasets.ImageFolder("mnist_imgs", transform=pipeline)  # 需要在mnist_imgs再创建一个image文件夹,在该文件夹下放图片1.png等等
test_loader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True)  # num_workers=2 容易报错
images, labels = next(iter(test_loader))  # images.shape = (128,1,28,28)
# idx, (images, labels) = next(enumerate(train_loader))
plt.imshow(images[0].squeeze().numpy())
plt.show()


# #  ##读取单通道图片---法2
# # 读取图片 这里是灰度图
# import cv2
# import numpy as np
# import os
# import matplotlib.pyplot as plt
#
# path = "mnist_imgs/image"
# img_path = os.listdir(path)
# data_x = np.zeros((len(img_path), 1, 28, 28), dtype="float32")
# # data_x = np.empty((len(img_path),1,28,28),dtype="float32")
# print(data_x.shape)
# i = 0
# data_y = []
# for item in img_path:
#     img = cv2.imread(os.path.join(path, item), 0)
#     # cv2.imshow("1", img)
#     # cv2.waitKey(10)
#     arr = np.asarray(img, dtype="float32")
#     arr = np.expand_dims(arr, axis=0)
#     print(arr.shape)
#     data_x[i, :, :, :] = arr
#     i += 1
# data_x = data_x / 255.
# data_x = torch.from_numpy(data_x)
# dataset = torch.utils.data.TensorDataset(data_x)
# test_loader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True)
# print(test_loader)
# i, images = next(enumerate(test_loader))  # 注意返回没有labels(images, labels) = next(iter(train_loader))
# print(images)
# print(images[0][0].shape)
# # print(images[0].squeeze().numpy().shape)  # plt.imshow(images[0][0])
# plt.imshow(images[0][0].squeeze(axis=0).numpy())
# plt.show()



# 4 构建网络
class Digit2(nn.Module):  # 继承父类
    def __init__(self):  # 构造方法
        super().__init__()  # 调用继承父类的方法
        self.conv1 = nn.Conv2d(1, 10, 5)  # 1: 输入通道数,mnist数据集为灰度图,10: 输出通道,5: 卷积核
        self.conv2 = nn.Conv2d(10, 20, 3)
        self.fc1 = nn.Linear(20*10*10, 500)  # 全连接层为线性层
        self.fc2 = nn.Linear(500, 10)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        self.relu3 = nn.ReLU()
        self.maxpool2d1 = nn.MaxPool2d(2, 2)
        self.maxpool2d2 = nn.MaxPool2d(2, 2)
        self.softmax = nn.Softmax(dim=1)  # batch行,10列

    def forward(self, x):
        input_size = x.size(0)  # x 这里针对读取到的train_loader(128, 1, 28, 28),128为batch
        x = self.conv1(x)  # 输入: (128, 1, 28, 28), 输出: (128, 10, 24, 24)  卷积后图像输出大小计算公式:N= W−F+2P)/S+1=(28-5+2*0)/1+1=24
        x = self.relu1(x)  # 激活函数,保持shape不变,(128, 10, 24, 24),或者写成x = F.relu(x)
        x = self.maxpool2d1(x)  # 最大池化,缩小,(128, 10, 12, 12)
        x = self.conv2(x)  # 输入: (128, 10, 12, 12), 输出: (128, 20, 10, 10)  大小计算公式: (12-3+2*0)/1+1=10,通道数10->20
        x = self.relu2(x)
        x = x.view(input_size, -1)  # 拉平,自动计算维度(128, 20, 10, 10) -> 128*2000, 或者使用torch.flatten(x, 1)???
        x = self.fc1(x)  # 128*2000 -> 128*500
        x = self.relu3(x)
        x = self.fc2(x)  # 128*500 -> 128*10
        out = self.softmax(x)
        return out


# 5 创建模型和定义优化器
model = Digit2().to(DEVICE)  # 创建模型部署到设备上
model.load_state_dict(torch.load("params_wuhd.pth"))


# 6 定义测试方法
def test_model(model, device, test_loader):
    model.eval()  # 模型验证
    with torch.no_grad():  # test不需要计算梯度和不需要进行反向传播,所以不用grad
        for batch_index, data in enumerate(test_loader):
            data = data[0].to(device)  # 数据部署到device上
            print(data.shape)
            output = model(data)  # 测试数据
            pred = output.argmax(dim=1)  # 找到最大值的下标,1表示在第二维度查找。torch.max(output, dim=1) 或者 output.max(1, keepdim=True)
            print(pred)


# 7 调用方法
test_model(model, DEVICE, test_loader)

注意:
(1)torchvision可以加载指定文件夹数据集(需要在数据集目录下再创建一个image子目录)
参考:https://blog.csdn.net/Sophia_11/article/details/107960750

(2)torchvision可以加载灰度图

pipeline = transforms.Compose([transforms.Grayscale(1),
                               transforms.ToTensor(),  # 将图片转为Tensor
                               transforms.Normalize((0.1307,), (0.3081,))]) 

(3)加载数据集出现如下报错:

TypeError: img should be PIL Image. Got <class ‘torch.Tensor‘>

需要将transforms.Compose()顺序换一下,例如:

pipeline = transforms.Compose([transforms.ToTensor(),  # 将图片转为Tensor
							   transforms.Grayscale(1),                               
                               transforms.Normalize((0.1307,), (0.3081,))]) 

换成:

pipeline = transforms.Compose([transforms.Grayscale(1),
                               transforms.ToTensor(),  # 将图片转为Tensor
                               transforms.Normalize((0.1307,), (0.3081,))]) 

参考:https://blog.csdn.net/qq_36468195/article/details/109130331

你可能感兴趣的:(PyTorch入门——Mnist数据集分类)