Pytorch定义网络结构可以对网络中的参数w和b进行手动定义的,也可以直接用nn.Linear定义层的方式来定义,更加方便的方式是直接继承nn.Module来定义自己的网络结构。
1. 使用nn.Linear方式
import torch
from torch import nn
from torch.nn import functional as F
# 模拟一张28x28的图片摊平
x = torch.randn(1, 784) # shape=[1,784]
# 定义三个全连接层
layer1 = nn.Linear(784, 200) # 输入784阶输出200阶
layer2 = nn.Linear(200, 200)
layer3 = nn.Linear(200, 10)
x = layer1(x) # shape=[1,200]
x = F.relu(x, inplace=True) # inplace=True在原对象基础上修改,可以节省内存
x = layer2(x) # shape=[1,200]
x = F.relu(x, inplace=True)
x = layer3(x) # shape=[1,10]
x = F.relu(x, inplace=True)
2. 继承nn.Module方式
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
class MLP(nn.Module):
"""自己定义网络的层,要继承nn.Module"""
def __init__(self):
"""在构造器中定义层次结构"""
super(MLP, self).__init__()
# 在这里定义网络的每一层,可以添加任何继承nn.Module的类
self.module = nn.Sequential(
nn.Linear(784, 200),
nn.ReLU(inplace=True),
nn.Linear(200, 200),
nn.ReLU(inplace=True),
nn.Linear(200, 10),
nn.ReLU(inplace=True)
)
def forward(self, x):
"""定义前向过程"""
x = self.module(x)
return x
"""超参数"""
batch_size = 200 # 每批的样本数量
learning_rate = 0.01 # 学习率
epochs = 10 # 跑多少次样本集
"""获取训练集"""
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True, # train=True则得到的是训练集
transform=transforms.Compose([ # 进行数据预处理
transforms.ToTensor(), # 这表示转成Tensor类型的数据
transforms.Normalize((0.1307,), (0.3081,)) # 这里是进行数据标准化(减去均值除以方差)
])),
batch_size=batch_size, shuffle=True) # 按batch_size分出一个batch维度在最前面,shuffle=True打乱顺序
"""获取测试集"""
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=batch_size, shuffle=True)
"""训练+测试过程"""
net = MLP()
# 这里net.parameters()得到这个类所定义的网络的参数,各个w和各个b
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
criteon = nn.CrossEntropyLoss()
for epoch in range(epochs):
"""训练"""
for batch_idx, (data, target) in enumerate(train_loader):
data = data.reshape(-1, 28 * 28) # 二维的图片数据摊平
logits = net(data) # 前面定义的网络MLP的输出
loss = criteon (logits, target) # nn.CrossEntropyLoss()自带Softmax
optimizer.zero_grad() # 梯度信息清空
loss.backward() # 反向传播获取梯度信息
optimizer.step() # 优化器执行
# 每100个batch输出一次信息
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
"""测试"""
test_loss = 0 # 在测试集上的Loss,反映了模型的表现
correct = 0 # 记录正确分类的样本数
# 对测试集中每个batch的样本,标签
for data, target in test_loader:
# 摊平成shape=[样本数,784]的形状
data = data.reshape(-1, 28 * 28)
# 这里前向计算过程就是用定义的网络跑一下
logits = net(data)
test_loss += criteon(logits, target).item()
# 得到的预测值输出是一个10个分量的概率,在第2个维度上取max
# logits.data是一个shape=[batch_size,10]的Tensor
# 注意Tensor.max(dim=1)是在这个Tensor的1号维度上求最大值
# 得到一个含有两个元素的元组,这两个元素都是shape=[batch_size]的Tensor
# 第一个Tensor里面存的都是最大值的值,第二个Tensor里面存的是对应的索引
# 这里要取索引,所以取了这个tuple的第二个元素
# print(type(logits.data), logits.data.shape,type(logits.data.max(dim=1)))
# pred = logits.data.max(dim=1)[1]
pred = logits.argmax(dim=1)
# 对应位置相等则对应位置为True,这里用sum()即记录了True的数量
correct += pred.eq(target.data).sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
运行结果:
Train Epoch: 0 [0/60000 (0%)] Loss: 2.302509
Train Epoch: 0 [20000/60000 (33%)] Loss: 2.039532
Train Epoch: 0 [40000/60000 (67%)] Loss: 1.327987
Test set: Average loss: 0.0047, Accuracy: 7708/10000 (77%)
Train Epoch: 1 [0/60000 (0%)] Loss: 1.008723
Train Epoch: 1 [20000/60000 (33%)] Loss: 0.930521
Train Epoch: 1 [40000/60000 (67%)] Loss: 0.642327
Test set: Average loss: 0.0032, Accuracy: 8094/10000 (80%)
Train Epoch: 2 [0/60000 (0%)] Loss: 0.639385
Train Epoch: 2 [20000/60000 (33%)] Loss: 0.632108
Train Epoch: 2 [40000/60000 (67%)] Loss: 0.526081
Test set: Average loss: 0.0020, Accuracy: 8961/10000 (89%)
Train Epoch: 3 [0/60000 (0%)] Loss: 0.385401
Train Epoch: 3 [20000/60000 (33%)] Loss: 0.374330
Train Epoch: 3 [40000/60000 (67%)] Loss: 0.247564
Test set: Average loss: 0.0016, Accuracy: 9115/10000 (91%)
Train Epoch: 4 [0/60000 (0%)] Loss: 0.327109
Train Epoch: 4 [20000/60000 (33%)] Loss: 0.421610
Train Epoch: 4 [40000/60000 (67%)] Loss: 0.423223
Test set: Average loss: 0.0014, Accuracy: 9178/10000 (91%)
Train Epoch: 5 [0/60000 (0%)] Loss: 0.301629
Train Epoch: 5 [20000/60000 (33%)] Loss: 0.288995
Train Epoch: 5 [40000/60000 (67%)] Loss: 0.193877
Test set: Average loss: 0.0013, Accuracy: 9231/10000 (92%)
Train Epoch: 6 [0/60000 (0%)] Loss: 0.191751
Train Epoch: 6 [20000/60000 (33%)] Loss: 0.263422
Train Epoch: 6 [40000/60000 (67%)] Loss: 0.322619
Test set: Average loss: 0.0013, Accuracy: 9277/10000 (92%)
Train Epoch: 7 [0/60000 (0%)] Loss: 0.239337
Train Epoch: 7 [20000/60000 (33%)] Loss: 0.161741
Train Epoch: 7 [40000/60000 (67%)] Loss: 0.206128
Test set: Average loss: 0.0012, Accuracy: 9294/10000 (92%)
Train Epoch: 8 [0/60000 (0%)] Loss: 0.263318
Train Epoch: 8 [20000/60000 (33%)] Loss: 0.227442
Train Epoch: 8 [40000/60000 (67%)] Loss: 0.220815
Test set: Average loss: 0.0011, Accuracy: 9333/10000 (93%)
Train Epoch: 9 [0/60000 (0%)] Loss: 0.204114
Train Epoch: 9 [20000/60000 (33%)] Loss: 0.245966
Train Epoch: 9 [40000/60000 (67%)] Loss: 0.261662
Test set: Average loss: 0.0011, Accuracy: 9360/10000 (93%)
注意:区分nn.ReLU和F.relu
这两个是典型的PyTorch的两种API:前者是一个类,类风格的API一般都在torch.nn
下,而且以大写字母开头;后者是一个函数,函数很多都在torch.nn.functional
下面,全小写字母。