import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets,transforms
torch.__version__
'1.2.0'
首先定义一些超参数
BATCH_SIZE = 512
EPOCHS = 10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 让torch判断是否使用GPU,建议使用GPU环境,因为会快很多
print(DEVICE)
cpu
train_loader = torch.utils.data.DataLoader(
datasets.MNIST(
root='./data/',
train=True,
download=True,
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
),
batch_size=BATCH_SIZE,
shuffle=True
)
print(train_loader)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST(
root = './data',
train=False,
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
),
batch_size = BATCH_SIZE,
shuffle = True
)
#这其实是建立一个 一次生成一个BATCH数据的生成器,可以用next(iter(test_loader()))的方式获取里面的数据
开始建立卷积网络。包括两个卷积层和紧接着的两个线性层作为输出。最后输出10个维度,这是个维度我们作为0-9的十个分类的标识来表示结果是哪个数字。
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
#1,28,28
self.conv1 = nn.Conv2d(1,10,5) # 1,28,28 > 10,24,24 (24 = (28-5)/1 +1
#中间经过池化把图片size变成12 * 12 了。(12 = (24-2)/2 + 1)
self.conv2 = nn.Conv2d(10,20,3) # 10,12,12 > 20,10,10 (10 = (12-3)/1 +1)
#这里没有池化了,用relu激活,图片尺寸没有变化
#在进入线性层之前会把数据摊平,变成 1,20*10*10 ,1 的尺寸
self.fc1 = nn.Linear(20*10*10,500) #尺寸变成 1,2000,1
self.fc2 = nn.Linear(500,10)
def forward(self,x):
in_size = x.size(0) #512 这是batch,在在网络传播过程中始终不变的参数
x = self.conv1(x) #24
x = F.relu(x) #24
x = F.max_pool2d(x,2,2) #12
x = self.conv2(x) #10
x = F.relu(x) #10 (此时的size为【512, 20, 10, 10】) = batch,channel,width,length)
x = x.view(in_size,-1) #2000
x = self.fc1(x) #500
x = F.relu(x) #500
x = self.fc2(x) #10
x = F.log_softmax(x,dim=1) #10
return x
我们实例化一个网络,实例化后使用.to方法将网络移动到GPU
优化器我们也直接选择简单暴力的Adam
# model = ConvNet().to(DEVICE) #这里因为是使用cpu所以注释了
model = ConvNet()
optimizer = optim.Adam(model.parameters())
下面定义一些训练的函数,我们将训练的所有操作封装在这个函数中
def train(model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx , (data,target) in enumerate(train_loader):
# data, target = data.to(device), target.to(device) #这是用GPU的
output = model(data)
optimizer.zero_grad()
loss = F.nll_loss(output,target)
loss.backward()
optimizer.step()
#打印损失
if (batch_idx+1) % 30 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
# data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # 将一批的损失相加
pred = output.max(1, keepdim=True)[1] # 找到概率最大的下标
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
for epoch in range(1, EPOCHS + 1):
train(model, DEVICE, train_loader, optimizer, epoch)
test(model, DEVICE, test_loader)
Train Epoch: 1 [14848/60000 (25%)] Loss: 0.352904
Train Epoch: 1 [30208/60000 (50%)] Loss: 0.207918
Train Epoch: 1 [45568/60000 (75%)] Loss: 0.151159
Test set: Average loss: 0.1085, Accuracy: 9668/10000 (97%)
Train Epoch: 2 [14848/60000 (25%)] Loss: 0.119579
Train Epoch: 2 [30208/60000 (50%)] Loss: 0.077938
Train Epoch: 2 [45568/60000 (75%)] Loss: 0.082806
Test set: Average loss: 0.0636, Accuracy: 9788/10000 (98%)
Train Epoch: 3 [14848/60000 (25%)] Loss: 0.042896
Train Epoch: 3 [30208/60000 (50%)] Loss: 0.034215
Train Epoch: 3 [45568/60000 (75%)] Loss: 0.051862
Test set: Average loss: 0.0660, Accuracy: 9786/10000 (98%)
Train Epoch: 4 [14848/60000 (25%)] Loss: 0.039835
Train Epoch: 4 [30208/60000 (50%)] Loss: 0.051723
Train Epoch: 4 [45568/60000 (75%)] Loss: 0.043836
Test set: Average loss: 0.0470, Accuracy: 9838/10000 (98%)
Train Epoch: 5 [14848/60000 (25%)] Loss: 0.028914
Train Epoch: 5 [30208/60000 (50%)] Loss: 0.041456
Train Epoch: 5 [45568/60000 (75%)] Loss: 0.031073
Test set: Average loss: 0.0380, Accuracy: 9867/10000 (99%)
Train Epoch: 6 [14848/60000 (25%)] Loss: 0.024093
Train Epoch: 6 [30208/60000 (50%)] Loss: 0.029969
Train Epoch: 6 [45568/60000 (75%)] Loss: 0.030132
Test set: Average loss: 0.0366, Accuracy: 9871/10000 (99%)
Train Epoch: 7 [14848/60000 (25%)] Loss: 0.021329
Train Epoch: 7 [30208/60000 (50%)] Loss: 0.012732
Train Epoch: 7 [45568/60000 (75%)] Loss: 0.034118
Test set: Average loss: 0.0416, Accuracy: 9852/10000 (99%)
Train Epoch: 8 [14848/60000 (25%)] Loss: 0.011759
Train Epoch: 8 [30208/60000 (50%)] Loss: 0.025838
Train Epoch: 8 [45568/60000 (75%)] Loss: 0.030025
Test set: Average loss: 0.0368, Accuracy: 9880/10000 (99%)
Train Epoch: 9 [14848/60000 (25%)] Loss: 0.018470
Train Epoch: 9 [30208/60000 (50%)] Loss: 0.016452
Train Epoch: 9 [45568/60000 (75%)] Loss: 0.027346
Test set: Average loss: 0.0334, Accuracy: 9891/10000 (99%)
Train Epoch: 10 [14848/60000 (25%)] Loss: 0.010000
Train Epoch: 10 [30208/60000 (50%)] Loss: 0.004566
Train Epoch: 10 [45568/60000 (75%)] Loss: 0.007505
Test set: Average loss: 0.0308, Accuracy: 9893/10000 (99%)