简单的手写数字识别
本文选用MNIST数据集,基于简单的CNN卷积神经网络,通过两个卷积层,两个池化层和两个全连接层,实现了手写体数字识别,输出每个epoch(训练轮次)的准确率,最后改变池化层和激活函数顺序,对比学习模型的结果。
数据集地址
NIST(美国国家标准与技术研究所) originally designated SD-3 as their training set and SD-1 as their test set.
The MNIST database was constructed from NIST’s Special Database 3 and Special Database 1 which contain binary images of handwritten digits.
The MNIST training set is composed of 30,000 patterns from SD-3 and 30,000 patterns from SD-1. Our test set was composed of 5,000 patterns from SD-3 and 5,000 patterns from SD-1. The 60,000 pattern training set contained examples from approximately 250 writers.
Four files are available on this site:
def load_data(path):
train_data = datasets.MNIST(root=path,
train=True,transform=transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))]),
target_transform=None,
download=True)
train = torch.utils.data.DataLoader(
train_data,
batch_size=64,
shuffle=True,
num_workers=0)
test_data = datasets.MNIST(
root=path,
train=False,
transform=transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))]),
target_transform=None,
download=True)
test = torch.utils.data.DataLoader(
test_data,
batch_size=64,
shuffle=True,
num_workers=0)
return train, test
本实例中用到的网络模型由两个卷积层,两个池化层和两个全连接层组成。(每层原理:https://blog.csdn.net/weixin_37985288/article/details/93318486)
网络结构
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(in_channels=1,out_channels=10,kernel_size=5)
self.conv2 = nn.Conv2d(in_channels=10,out_channels=20,kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size=2)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size=2)
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x, dim=0)
`
第一层 卷积层(conv1)输入channel数为1,输出channel数选为10,卷积核大小为55。输入为6412828的张量,输出为64102424的张量。
经过一个22的最大池化层(max_pool2d),输出张量规模为641012*12。
第二层 卷积层(conv2)输入channel数为10,输出channel数选为20,卷积核大小为55。输入为64101212的张量,输出为642088的张量。
经过一个22的最大池化层(max_pool2d),输出张量规模为64104*4。
激活函数为relu,是目前用的较多的激活函数。常用激活函数:
经过两层卷积后,将所得张量经过两个全连接层,线性映射为1*10的张量,其中每个元素表示该张图片属于相应类别的概率。
def train(model, epoch, train_loader, optimizer):
model.train()
for idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if idx % 50 == 49:
print('Train epoch: %d Loss: %.3f ' % (epoch+1, loss))
train函数
输入参数为网络模型model,训练轮次epoch,训练数据集train_loader和优化方式optimizer。
梯度下降算法,每次枚举zero_grad梯度置零,model向前传播求预测值,loss求损失,backward反向传播求梯度,step更新参数(完成单次优化)
当idx跑完50,训练轮次+1
def test(model, test_loader):
model.eval()
correct = 0
for data, target in test_loader:
output = model(data)
predict = output.data.max(1)[1]
correct = correct + predict.eq(target.data).sum()
print('Accuracy: %2d' % (100*correct/10000), '%')
def main():
data_base = './Datasets'
mnist_path = os.path.join(data_base, 'MNIST')
train_loader, test_loader = load_data(mnist_path)
model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epochs = 10
for epoch in range(epochs):
train(model, epoch, train_loader, optimizer)
test(model, test_loader)
从main函数中看出,本实例采用SGD优化算法,优化模型的参数,学习率为0.01,动量因子为0.5,训练轮次为10,一轮训练中每次训练后输出损失值,每轮训练后输出一次测试值
correlation: 98% after 5 epochs
if exchanging pooling and relu,correlation reachres 98% after 8 epochs
import os
import torchvision.datasets as datasets
import torch.utils.data
from torchvision import transforms
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(in_channels=1,out_channels=10,kernel_size=5)
self.conv2 = nn.Conv2d(in_channels=10,out_channels=20,kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size=2)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size=2)
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x, dim=0)
# Load training and test data
def load_data(path):
train_data = datasets.MNIST(root=path,
train=True,transform=transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))]),
target_transform=None,
download=True)
train = torch.utils.data.DataLoader(
train_data,
batch_size=64,
shuffle=True,
num_workers=0)
test_data = datasets.MNIST(
root=path,
train=False,
transform=transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))]),
target_transform=None,
download=True)
test = torch.utils.data.DataLoader(
test_data,
batch_size=64,
shuffle=True,
num_workers=0)
return train, test
# Train the net
def train(model, epoch, train_loader, optimizer):
model.train()
for idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if idx % 50 == 49:
print('Train epoch: %d Loss: %.3f ' % (epoch+1, loss))
# Test the net
def test(model, test_loader):
model.eval()
correct = 0
for data, target in test_loader:
output = model(data)
predict = output.data.max(1)[1]
correct = correct + predict.eq(target.data).sum()
print('Accuracy: %2d' % (100*correct/10000), '%')
def main():
data_base = './Datasets'
mnist_path = os.path.join(data_base, 'MNIST')
train_loader, test_loader = load_data(mnist_path)
model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epochs = 10
for epoch in range(epochs):
train(model, epoch, train_loader, optimizer)
test(model, test_loader)
if __name__ == '__main__':
main()