# 1 加载相关库
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms # 对数据库和图片预处理
# 2 定义超参数
BATCH_SIZE = 128 # 每批处理的数据
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") # cpu还是gpu,不写gpu写cuda
EPOCHS = 10 # 总共6w张,一个epoch表示一次训练完6w张, 10轮为60w张
# 3 构建pipeline,对图像做预处理
pipeline = transforms.Compose([transforms.ToTensor(), # 将图片转为Tensor
transforms.Normalize((0.1307,), (0.3081, ))]) # 正则化:降低模型复杂度
# 4 下载和加载数据
from torch.utils.data import DataLoader
train_set = datasets.MNIST(root="./mnist_data/", train=True, transform=pipeline, download=True) # 下载不成功,可以多试几次
test_set = datasets.MNIST(root="./mnist_data/", train=False, transform=pipeline, download=True)
train_loader = DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=False) # shuffle打乱图片
test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=True) # 可单独定义train和test的BATCH_SIZE
# 5 显示mnist中的图片
import matplotlib.pyplot as plt
images, labels = next(iter(train_loader)) # idx, (images, labels) = next(enumerate(train_loader)), images.shape = (128,1,28,28)
plt.imshow(images[0].squeeze().numpy())
plt.show()
plt.imsave("1.png", images[3].squeeze().numpy()) # 保存图片
print(images[0].squeeze().numpy().shape) # plt.imshow(images[0][0])
# 6 构建网络模型
# 方式1
class Digit(nn.Module): # 继承父类
def __init__(self): # 构造方法
super().__init__() # 调用继承父类的方法
self.conv1 = nn.Conv2d(1, 10, 5) # 1: 输入通道数,mnist数据集为灰度图单通道,10: 输出通道,5: 卷积核
self.conv2 = nn.Conv2d(10, 20, 3)
self.fc1 = nn.Linear(20*10*10, 500) # 全连接层为线性层
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
input_size = x.size(0) # x 这里针对读取到的train_loader(128, 1, 28, 28),128为batch
x = self.conv1(x) # 输入: (128, 1, 28, 28), 输出: (128, 10, 24, 24) 卷积后图像输出大小计算公式:N= W−F+2P)/S+1=(28-5+2*0)/1+1=24
x = F.relu(x) # 激活函数,保持shape不变,(128, 10, 24, 24)
x = F.max_pool2d(x, 2, 2) # 最大池化,缩小,(128, 10, 12, 12)
x = self.conv2(x) # 输入: (128, 10, 12, 12), 输出: (128, 20, 10, 10) 大小计算公式: (12-3+2*0)/1+1=10,通道数10->20
x = F.relu(x)
x = x.view(input_size, -1) # 拉平,自动计算维度(128, 20, 10, 10) -> 128*2000
x = self.fc1(x) # 128*2000 -> 128*500
x = F.relu(x)
x = self.fc2(x) # 128*500 -> 128*10
out = F.softmax(x, dim=1)
# 方式2
class Digit2(nn.Module): # 继承父类
def __init__(self): # 构造方法
super().__init__() # 调用继承父类的方法
self.conv1 = nn.Conv2d(1, 10, 5) # 1: 输入通道数,mnist数据集为灰度图,10: 输出通道,5: 卷积核
self.conv2 = nn.Conv2d(10, 20, 3)
self.fc1 = nn.Linear(20*10*10, 500) # 全连接层为线性层
self.fc2 = nn.Linear(500, 10)
self.relu1 = nn.ReLU()
self.relu2 = nn.ReLU()
self.relu3 = nn.ReLU()
self.maxpool2d1 = nn.MaxPool2d(2, 2)
self.maxpool2d2 = nn.MaxPool2d(2, 2)
self.softmax = nn.Softmax(dim=1) # batch行,10列
def forward(self, x):
input_size = x.size(0) # x 这里针对读取到的train_loader(128, 1, 28, 28),128为batch
x = self.conv1(x) # 输入: (128, 1, 28, 28), 输出: (128, 10, 24, 24) 卷积后图像输出大小计算公式:N= W−F+2P)/S+1=(28-5+2*0)/1+1=24
x = self.relu1(x) # 激活函数,保持shape不变,(128, 10, 24, 24),或者写成x = F.relu(x)
x = self.maxpool2d1(x) # 最大池化,缩小,(128, 10, 12, 12)
x = self.conv2(x) # 输入: (128, 10, 12, 12), 输出: (128, 20, 10, 10) 大小计算公式: (12-3+2*0)/1+1=10,通道数10->20
x = self.relu2(x)
x = x.view(input_size, -1) # 拉平,自动计算维度(128, 20, 10, 10) -> 128*2000, 或者使用torch.flatten(x, 1)???
x = self.fc1(x) # 128*2000 -> 128*500
x = self.relu3(x)
x = self.fc2(x) # 128*500 -> 128*10
out = self.softmax(x)
return out
# 7 创建模型和定义优化器
model = Digit2().to(DEVICE) # 创建模型部署到设备上
optimizer = optim.Adam(model.parameters()) # 更新模型参数,使得最终模型参数达到最优值
# 8 定义训练方法
def train_model(model, device, train_loader, optimizer, epoch):
model.train() # 模型训练
for batch_index, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device) # 数据部署到设备上
optimizer.zero_grad() # 梯度设置为0,不然梯度会累积
output = model(data) # 训练后结果
loss = F.cross_entropy(output, target) # 多分类交叉熵损失函数, 二分类函数可以选择用sigmoid,
# CE = nn.CrossEntropyLoss(), loss = CE(output, target)
loss.backward() # 反向传播
optimizer.step() # 参数更新
if batch_index % 100 == 0: # 60000/128=469,每100次输出一个
print("Train Epoch:{} \t loss:{:.6f}".format(epoch, loss.item())) # 保留6位有效数字的float数, .item()方法 是得到一个元素张量里面的元素值
# 9 定义测试方法
def test_model(model, device, test_loader):
model.eval() # 模型验证
correct = 0.0 # 准确率
test_loss = 0.0 # 测试损失
with torch.no_grad(): # test不需要计算梯度和不需要进行反向传播,所以不用grad
for batch_index, (data, target) in enumerate(test_loader):
data, target = data.to(device), target.to(device) # 数据部署到device上
output = model(data) # 测试数据
test_loss += F.cross_entropy(output, target).item() # 计算测试损失
pred = output.argmax(dim=1) # 找到最大值的下标,1表示在第二维度查找。torch.max(output, dim=1) 或者 output.max(1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item() # 累计正确率
test_loss /= len(test_loader.dataset)
print("Test--Average loss:{:.4f}, Accuracy: {:.3f}\n".format(test_loss, 100.0*correct/len(test_loader.dataset)))
# 10 调用方法 8/9
for epoch in range(1, EPOCHS+1):
train_model(model, DEVICE, train_loader, optimizer, epoch)
test_model(model, DEVICE, test_loader)
# 11 模型保存
torch.save(model.state_dict(), "params_wuhd.pth")
# 1 加载相关库
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms # 对数据库和图片预处理
# 2 定义超参数
BATCH_SIZE = 128 # 每批处理的数据
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") # cpu还是gpu,不写gpu写cuda
# 3 构建pipeline,对图像做预处理
pipeline = transforms.Compose([transforms.ToTensor(), # 将图片转为Tensor
transforms.Normalize((0.1307,), (0.3081, ))]) # 正则化:降低模型复杂度
# 4 下载和加载数据
from torch.utils.data import DataLoader
test_set = datasets.MNIST(root="./mnist_data/", train=False, transform=pipeline, download=True)
test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=True) # 可单独定义train和test的BATCH_SIZE
# 5 构建网络
class Digit2(nn.Module): # 继承父类
def __init__(self): # 构造方法
super().__init__() # 调用继承父类的方法
self.conv1 = nn.Conv2d(1, 10, 5) # 1: 输入通道数,mnist数据集为灰度图,10: 输出通道,5: 卷积核
self.conv2 = nn.Conv2d(10, 20, 3)
self.fc1 = nn.Linear(20*10*10, 500) # 全连接层为线性层
self.fc2 = nn.Linear(500, 10)
self.relu1 = nn.ReLU()
self.relu2 = nn.ReLU()
self.relu3 = nn.ReLU()
self.maxpool2d1 = nn.MaxPool2d(2, 2)
self.maxpool2d2 = nn.MaxPool2d(2, 2)
self.softmax = nn.Softmax(dim=1) # batch行,10列
def forward(self, x):
input_size = x.size(0) # x 这里针对读取到的train_loader(128, 1, 28, 28),128为batch
x = self.conv1(x) # 输入: (128, 1, 28, 28), 输出: (128, 10, 24, 24) 卷积后图像输出大小计算公式:N= W−F+2P)/S+1=(28-5+2*0)/1+1=24
x = self.relu1(x) # 激活函数,保持shape不变,(128, 10, 24, 24),或者写成x = F.relu(x)
x = self.maxpool2d1(x) # 最大池化,缩小,(128, 10, 12, 12)
x = self.conv2(x) # 输入: (128, 10, 12, 12), 输出: (128, 20, 10, 10) 大小计算公式: (12-3+2*0)/1+1=10,通道数10->20
x = self.relu2(x)
x = x.view(input_size, -1) # 拉平,自动计算维度(128, 20, 10, 10) -> 128*2000, 或者使用torch.flatten(x, 1)???
x = self.fc1(x) # 128*2000 -> 128*500
x = self.relu3(x)
x = self.fc2(x) # 128*500 -> 128*10
out = self.softmax(x)
return out
# 6 创建模型和定义优化器
model = Digit2().to(DEVICE) # 创建模型部署到设备上
model.load_state_dict(torch.load("params_wuhd.pth"))
# 7 定义测试方法
def test_model(model, device, test_loader):
model.eval() # 模型验证
correct = 0.0 # 准确率
test_loss = 0.0 # 测试损失
with torch.no_grad(): # test不需要计算梯度和不需要进行反向传播,所以不用grad
for batch_index, (data, target) in enumerate(test_loader):
data, target = data.to(device), target.to(device) # 数据部署到device上
output = model(data) # 测试数据
test_loss += F.cross_entropy(output, target).item() # 计算测试损失
pred = output.argmax(dim=1) # 找到最大值的下标,1表示在第二维度查找。torch.max(output, dim=1) 或者 output.max(1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item() # 累计正确率
test_loss /= len(test_loader.dataset)
print("Test--Average loss:{:.4f}, Accuracy: {:.3f}\n".format(test_loss, 100.0*correct/len(test_loader.dataset)))
# 8 调用方法
test_model(model, DEVICE, test_loader)
# 1 加载相关库
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import datasets, transforms # 对数据库和图片预处理
# 2 定义超参数
BATCH_SIZE = 128 # 每批处理的数据
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") # cpu还是gpu,不写gpu写cuda
# 3 构建pipeline,对图像做预处理
# ##读取单通道图片---法1
pipeline = transforms.Compose([transforms.Grayscale(1),
transforms.ToTensor(), # 将图片转为Tensor
transforms.Normalize((0.1307,), (0.3081,))]) # 正则化:降低模型复杂度
dataset = datasets.ImageFolder("mnist_imgs", transform=pipeline) # 需要在mnist_imgs再创建一个image文件夹,在该文件夹下放图片1.png等等
test_loader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True) # num_workers=2 容易报错
images, labels = next(iter(test_loader)) # images.shape = (128,1,28,28)
# idx, (images, labels) = next(enumerate(train_loader))
plt.imshow(images[0].squeeze().numpy())
plt.show()
# # ##读取单通道图片---法2
# # 读取图片 这里是灰度图
# import cv2
# import numpy as np
# import os
# import matplotlib.pyplot as plt
#
# path = "mnist_imgs/image"
# img_path = os.listdir(path)
# data_x = np.zeros((len(img_path), 1, 28, 28), dtype="float32")
# # data_x = np.empty((len(img_path),1,28,28),dtype="float32")
# print(data_x.shape)
# i = 0
# data_y = []
# for item in img_path:
# img = cv2.imread(os.path.join(path, item), 0)
# # cv2.imshow("1", img)
# # cv2.waitKey(10)
# arr = np.asarray(img, dtype="float32")
# arr = np.expand_dims(arr, axis=0)
# print(arr.shape)
# data_x[i, :, :, :] = arr
# i += 1
# data_x = data_x / 255.
# data_x = torch.from_numpy(data_x)
# dataset = torch.utils.data.TensorDataset(data_x)
# test_loader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True)
# print(test_loader)
# i, images = next(enumerate(test_loader)) # 注意返回没有labels(images, labels) = next(iter(train_loader))
# print(images)
# print(images[0][0].shape)
# # print(images[0].squeeze().numpy().shape) # plt.imshow(images[0][0])
# plt.imshow(images[0][0].squeeze(axis=0).numpy())
# plt.show()
# 4 构建网络
class Digit2(nn.Module): # 继承父类
def __init__(self): # 构造方法
super().__init__() # 调用继承父类的方法
self.conv1 = nn.Conv2d(1, 10, 5) # 1: 输入通道数,mnist数据集为灰度图,10: 输出通道,5: 卷积核
self.conv2 = nn.Conv2d(10, 20, 3)
self.fc1 = nn.Linear(20*10*10, 500) # 全连接层为线性层
self.fc2 = nn.Linear(500, 10)
self.relu1 = nn.ReLU()
self.relu2 = nn.ReLU()
self.relu3 = nn.ReLU()
self.maxpool2d1 = nn.MaxPool2d(2, 2)
self.maxpool2d2 = nn.MaxPool2d(2, 2)
self.softmax = nn.Softmax(dim=1) # batch行,10列
def forward(self, x):
input_size = x.size(0) # x 这里针对读取到的train_loader(128, 1, 28, 28),128为batch
x = self.conv1(x) # 输入: (128, 1, 28, 28), 输出: (128, 10, 24, 24) 卷积后图像输出大小计算公式:N= W−F+2P)/S+1=(28-5+2*0)/1+1=24
x = self.relu1(x) # 激活函数,保持shape不变,(128, 10, 24, 24),或者写成x = F.relu(x)
x = self.maxpool2d1(x) # 最大池化,缩小,(128, 10, 12, 12)
x = self.conv2(x) # 输入: (128, 10, 12, 12), 输出: (128, 20, 10, 10) 大小计算公式: (12-3+2*0)/1+1=10,通道数10->20
x = self.relu2(x)
x = x.view(input_size, -1) # 拉平,自动计算维度(128, 20, 10, 10) -> 128*2000, 或者使用torch.flatten(x, 1)???
x = self.fc1(x) # 128*2000 -> 128*500
x = self.relu3(x)
x = self.fc2(x) # 128*500 -> 128*10
out = self.softmax(x)
return out
# 5 创建模型和定义优化器
model = Digit2().to(DEVICE) # 创建模型部署到设备上
model.load_state_dict(torch.load("params_wuhd.pth"))
# 6 定义测试方法
def test_model(model, device, test_loader):
model.eval() # 模型验证
with torch.no_grad(): # test不需要计算梯度和不需要进行反向传播,所以不用grad
for batch_index, data in enumerate(test_loader):
data = data[0].to(device) # 数据部署到device上
print(data.shape)
output = model(data) # 测试数据
pred = output.argmax(dim=1) # 找到最大值的下标,1表示在第二维度查找。torch.max(output, dim=1) 或者 output.max(1, keepdim=True)
print(pred)
# 7 调用方法
test_model(model, DEVICE, test_loader)
注意:
(1)torchvision可以加载指定文件夹数据集(需要在数据集目录下再创建一个image子目录)
参考:https://blog.csdn.net/Sophia_11/article/details/107960750
(2)torchvision可以加载灰度图
pipeline = transforms.Compose([transforms.Grayscale(1),
transforms.ToTensor(), # 将图片转为Tensor
transforms.Normalize((0.1307,), (0.3081,))])
(3)加载数据集出现如下报错:
TypeError: img should be PIL Image. Got <class ‘torch.Tensor‘>
需要将transforms.Compose()
顺序换一下,例如:
pipeline = transforms.Compose([transforms.ToTensor(), # 将图片转为Tensor
transforms.Grayscale(1),
transforms.Normalize((0.1307,), (0.3081,))])
换成:
pipeline = transforms.Compose([transforms.Grayscale(1),
transforms.ToTensor(), # 将图片转为Tensor
transforms.Normalize((0.1307,), (0.3081,))])
参考:https://blog.csdn.net/qq_36468195/article/details/109130331