多层感知机
多层感知机即在单层的神经网络中添加了一个隐藏层,简单的一个线性模型就是单层神经网络,而多层感知机是指大于一层的神经网络,在代码上与线性模型的区别就是网络模型构建时多添加一层隐藏层,其余步骤类似。
代码部分
1.导入库
#使用多层感知机对fashion_mnist分类
#导入库
import torch
import numpy as np
import sys
from torch import nn
from torch.nn import init
import torchvision
import torchvision.transforms as transforms
2.导入fashion_mnist数据集及类别标签 由于我已经下载这个数据集 则将download设置为False,否则设置为True
#继续使用fashion_mnist数据集
mnist_train = torchvision.datasets.FashionMNIST(root='~/Desktop/OpenCV_demo/Datasets/FashionMNIST', train=True, download=False, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Desktop/OpenCV_demo/Datasets/FashionMNIST', train=False, download=False, transform=transforms.ToTensor())
#数据集中的10个类别
def get_fashion_mnist_labels(labels):
text_labels = ['t-shirt','trouser','pullover','dress','coat',
'sandal','shirt','sneaker','bag','ankle boot']
return [text_labels[int(i)]for i in labels]
3.设置batch_size
#batch_size的设置
batch_size = 256
if sys.platform.startswith('win'):
num_workers = 0
else:
num_workers = 4
train_iter = torch.utils.data.DataLoader(
mnist_train,batch_size = batch_size,shuffle = True,num_workers = num_workers)
test_iter = torch.utils.data.DataLoader(
mnist_test,batch_size = batch_size,shuffle = True,num_workers = num_workers)
4.初始换参数 w1,w2,b1,b2 ,此时添加了一层隐藏层
#设置输入28*28=784 输出设置为10(类别) 隐藏单元个数设置为256 两层的神经网络
num_inputs,num_outputs,num_hiddens = 784,10,256
w1 = torch.tensor(np.random.normal(0,0.01,(num_inputs,num_hiddens)),dtype = torch.float)
b1 = torch.zeros(num_hiddens,dtype = torch.float)
w2 = torch.tensor(np.random.normal(0,0.01,(num_hiddens,num_outputs)),dtype = torch.float)
b2 = torch.zeros(num_outputs,dtype = torch.float)
params = [w1,b1,w2,b2]
for param in params:
param.requires_grad_(requires_grad = True)
5.定义激活函数,损失函数,优化函数
#定义激活函数RELU
def relu(X):
return torch.max(input=X,other = torch.tensor(0.0))
#定义损失函数
loss = torch.nn.CrossEntropyLoss()
#优化算法 小批量随机梯度下降算法
def sgd(params,lr,batch_size):
for param in params:
param.data -= lr*param.grad/batch_size
#也可以由一句话代替
#loss = torch.nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
6.定义网络模型 双层网络,经过激活函数传递到下一层
#定义模型 torch.matmul表示当 两者维度不一致时会自动填充到相应的维度进行点称
def net(X):
X = X.view((-1,num_inputs))
H = relu(torch.matmul(X,w1)+b1)
return torch.matmul(H,w2)+b2
'''
#网络模型的简洁搭建方法
class FlattenLayer(nn.Module):
def __init__(self):
super(FlattenLayer,self).__init__()
def forward(self,x):
return x.view(x.shape[0],-1)
net = nn.Sequential(
d2l.FlattenLayer(),
nn.Linear(num_inputs, num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens, num_outputs),
)
for params in net.parameters():
init.normal_(params, mean=0, std=0.01)
'''
7.定义模型参数精度函数
#在模型上评价数据集的准确率 .item()将Tensor转换为number
def evaluate_accuracy(data_iter,net):
acc_sum,n = 0.0,0
for X,y in data_iter:
#计算判断准确的元素
acc_sum += (net(X).argmax(dim=1)==y).float().sum().item()
#通过shape 获得y的列元素
n += y.shape[0]
return acc_sum/n
8.训练模型
#训练模型
#由于这里定义的损失函数已经除了batch_size 且SGD函数也除l了batch_size(不需要的)
#因此这里的学习率设置的大一些
num_epochs,lr= 5,100
def train_softmax(net,train_iter,test_iter,loss,num_epochs,batch_size,
params=None, lr=None, optimizer=None):
for epoch in range(num_epochs):
#损失值、正确数量、总数 初始化
train_l_sum,train_acc_sum,n = 0.0,0.0,0
for X,y in train_iter:
y_hat = net(X)
l = loss(y_hat,y).sum()
# 梯度清零 损失函数和优化函数梯度清零
if optimizer is not None:
optimizer.zero_grad()
elif params is not None and params[0].grad is not None:
for param in params:
param.grad.data.zero_()
l.backward()
if optimizer is None:
sgd(params, lr, batch_size)
else:
optimizer.step()
train_l_sum += l.item()
train_acc_sum +=(y_hat.argmax(dim=1)==y).sum().item()
n += y.shape[0]
test_acc = evaluate_accuracy(test_iter,net)
print('epoch %d, loss %.4f, train acc %.3f,test acc %.3f'
%(epoch+1,train_l_sum/n,train_acc_sum/n,test_acc))
#如果使用简洁网络,则需要修改 params和lr改成 optimizer
train_softmax(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)
完整代码
#使用多层感知机对fashion_mnist分类
#导入库
import torch
import numpy as np
import sys
from torch import nn
from torch.nn import init
import torchvision
import torchvision.transforms as transforms
#继续使用fashion_mnist数据集
mnist_train = torchvision.datasets.FashionMNIST(root='~/Desktop/OpenCV_demo/Datasets/FashionMNIST', train=True, download=False, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Desktop/OpenCV_demo/Datasets/FashionMNIST', train=False, download=False, transform=transforms.ToTensor())
#数据集中的10个类别
def get_fashion_mnist_labels(labels):
text_labels = ['t-shirt','trouser','pullover','dress','coat',
'sandal','shirt','sneaker','bag','ankle boot']
return [text_labels[int(i)]for i in labels]
#batch_size的设置
batch_size = 256
if sys.platform.startswith('win'):
num_workers = 0
else:
num_workers = 4
train_iter = torch.utils.data.DataLoader(
mnist_train,batch_size = batch_size,shuffle = True,num_workers = num_workers)
test_iter = torch.utils.data.DataLoader(
mnist_test,batch_size = batch_size,shuffle = True,num_workers = num_workers)
#设置输入28*28=784 输出设置为10(类别) 隐藏单元个数设置为256 两层的神经网络
num_inputs,num_outputs,num_hiddens = 784,10,256
w1 = torch.tensor(np.random.normal(0,0.01,(num_inputs,num_hiddens)),dtype = torch.float)
b1 = torch.zeros(num_hiddens,dtype = torch.float)
w2 = torch.tensor(np.random.normal(0,0.01,(num_hiddens,num_outputs)),dtype = torch.float)
b2 = torch.zeros(num_outputs,dtype = torch.float)
params = [w1,b1,w2,b2]
for param in params:
param.requires_grad_(requires_grad = True)
#定义激活函数RELU
def relu(X):
return torch.max(input=X,other = torch.tensor(0.0))
#定义模型 torch.matmul表示当 两者维度不一致时会自动填充到相应的维度进行点称
def net(X):
X = X.view((-1,num_inputs))
H = relu(torch.matmul(X,w1)+b1)
return torch.matmul(H,w2)+b2
#定义损失函数
loss = torch.nn.CrossEntropyLoss()
#优化算法 小批量随机梯度下降算法
def sgd(params,lr,batch_size):
for param in params:
param.data -= lr*param.grad/batch_size
#在模型上评价数据集的准确率 .item()将Tensor转换为number
def evaluate_accuracy(data_iter,net):
acc_sum,n = 0.0,0
for X,y in data_iter:
#计算判断准确的元素
acc_sum += (net(X).argmax(dim=1)==y).float().sum().item()
#通过shape 获得y的列元素
n += y.shape[0]
return acc_sum/n
#训练模型
#由于这里定义的损失函数已经除了batch_size 且SGD函数也除l了batch_size(不需要的)
#因此这里的学习率设置的大一些
num_epochs,lr= 5,100
def train_softmax(net,train_iter,test_iter,loss,num_epochs,batch_size,
params=None, lr=None, optimizer=None):
for epoch in range(num_epochs):
#损失值、正确数量、总数 初始化
train_l_sum,train_acc_sum,n = 0.0,0.0,0
for X,y in train_iter:
y_hat = net(X)
l = loss(y_hat,y).sum()
# 梯度清零 损失函数和优化函数梯度清零
if optimizer is not None:
optimizer.zero_grad()
elif params is not None and params[0].grad is not None:
for param in params:
param.grad.data.zero_()
l.backward()
if optimizer is None:
sgd(params, lr, batch_size)
else:
optimizer.step()
train_l_sum += l.item()
train_acc_sum +=(y_hat.argmax(dim=1)==y).sum().item()
n += y.shape[0]
test_acc = evaluate_accuracy(test_iter,net)
print('epoch %d, loss %.4f, train acc %.3f,test acc %.3f'
%(epoch+1,train_l_sum/n,train_acc_sum/n,test_acc))
train_softmax(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)