【PyTorch】基于PyTorch的多层感知机的实现

多层感知机
多层感知机即在单层的神经网络中添加了一个隐藏层,简单的一个线性模型就是单层神经网络,而多层感知机是指大于一层的神经网络,在代码上与线性模型的区别就是网络模型构建时多添加一层隐藏层,其余步骤类似。

代码部分

1.导入库

#使用多层感知机对fashion_mnist分类
#导入库
import torch 
import numpy as np 
import sys
from torch import nn
from torch.nn import init
import torchvision
import torchvision.transforms as transforms

2.导入fashion_mnist数据集及类别标签 由于我已经下载这个数据集 则将download设置为False,否则设置为True

#继续使用fashion_mnist数据集
mnist_train = torchvision.datasets.FashionMNIST(root='~/Desktop/OpenCV_demo/Datasets/FashionMNIST', train=True, download=False, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Desktop/OpenCV_demo/Datasets/FashionMNIST', train=False, download=False, transform=transforms.ToTensor())

#数据集中的10个类别
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt','trouser','pullover','dress','coat',
                  'sandal','shirt','sneaker','bag','ankle boot']
    return [text_labels[int(i)]for i in labels]

3.设置batch_size

#batch_size的设置
batch_size = 256
if sys.platform.startswith('win'):
    num_workers = 0
else:
    num_workers = 4
train_iter = torch.utils.data.DataLoader(
    mnist_train,batch_size = batch_size,shuffle = True,num_workers =  num_workers)
test_iter = torch.utils.data.DataLoader(
    mnist_test,batch_size = batch_size,shuffle = True,num_workers =  num_workers)
  

4.初始换参数 w1,w2,b1,b2 ,此时添加了一层隐藏层

#设置输入28*28=784 输出设置为10(类别) 隐藏单元个数设置为256 两层的神经网络
num_inputs,num_outputs,num_hiddens = 784,10,256

w1 = torch.tensor(np.random.normal(0,0.01,(num_inputs,num_hiddens)),dtype = torch.float)
b1 = torch.zeros(num_hiddens,dtype = torch.float)
w2 = torch.tensor(np.random.normal(0,0.01,(num_hiddens,num_outputs)),dtype = torch.float)
b2 = torch.zeros(num_outputs,dtype = torch.float)

params = [w1,b1,w2,b2]
for param in params:
    param.requires_grad_(requires_grad = True)

5.定义激活函数,损失函数,优化函数

#定义激活函数RELU
def relu(X):
    return torch.max(input=X,other = torch.tensor(0.0))
#定义损失函数
loss = torch.nn.CrossEntropyLoss()

#优化算法  小批量随机梯度下降算法
def sgd(params,lr,batch_size):
    for param in params:
        param.data -= lr*param.grad/batch_size
#也可以由一句话代替
#loss = torch.nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(net.parameters(), lr=0.5)


6.定义网络模型 双层网络,经过激活函数传递到下一层

#定义模型  torch.matmul表示当 两者维度不一致时会自动填充到相应的维度进行点称
def net(X):
    X = X.view((-1,num_inputs))
    H = relu(torch.matmul(X,w1)+b1)
    return torch.matmul(H,w2)+b2

'''
#网络模型的简洁搭建方法
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer,self).__init__()
    def forward(self,x):
        return x.view(x.shape[0],-1)
net = nn.Sequential(
        d2l.FlattenLayer(),
        nn.Linear(num_inputs, num_hiddens),
        nn.ReLU(),
        nn.Linear(num_hiddens, num_outputs), 
        )

for params in net.parameters():
    init.normal_(params, mean=0, std=0.01)
'''

7.定义模型参数精度函数

#在模型上评价数据集的准确率  .item()将Tensor转换为number
def evaluate_accuracy(data_iter,net):
    acc_sum,n = 0.0,0
    for X,y in data_iter:
        #计算判断准确的元素
        acc_sum += (net(X).argmax(dim=1)==y).float().sum().item()
        #通过shape 获得y的列元素
        n += y.shape[0]
    return acc_sum/n

8.训练模型

#训练模型 
#由于这里定义的损失函数已经除了batch_size 且SGD函数也除l了batch_size(不需要的)
#因此这里的学习率设置的大一些 
num_epochs,lr= 5,100

def train_softmax(net,train_iter,test_iter,loss,num_epochs,batch_size,
                  params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        #损失值、正确数量、总数 初始化
        train_l_sum,train_acc_sum,n = 0.0,0.0,0
        
        for X,y in train_iter:
            y_hat = net(X)
            l = loss(y_hat,y).sum()
            
             # 梯度清零 损失函数和优化函数梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
                    
            l.backward()
            if optimizer is None:
                sgd(params, lr, batch_size)
            else:
                optimizer.step() 
            
            train_l_sum += l.item()
            train_acc_sum +=(y_hat.argmax(dim=1)==y).sum().item()
            n += y.shape[0]
            
        test_acc = evaluate_accuracy(test_iter,net)
        print('epoch %d, loss %.4f, train acc %.3f,test acc %.3f'
              %(epoch+1,train_l_sum/n,train_acc_sum/n,test_acc))
#如果使用简洁网络,则需要修改 params和lr改成 optimizer        
train_softmax(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)

完整代码

#使用多层感知机对fashion_mnist分类
#导入库
import torch 
import numpy as np 
import sys
from torch import nn
from torch.nn import init
import torchvision
import torchvision.transforms as transforms

#继续使用fashion_mnist数据集
mnist_train = torchvision.datasets.FashionMNIST(root='~/Desktop/OpenCV_demo/Datasets/FashionMNIST', train=True, download=False, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Desktop/OpenCV_demo/Datasets/FashionMNIST', train=False, download=False, transform=transforms.ToTensor())

#数据集中的10个类别
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt','trouser','pullover','dress','coat',
                  'sandal','shirt','sneaker','bag','ankle boot']
    return [text_labels[int(i)]for i in labels]

#batch_size的设置
batch_size = 256
if sys.platform.startswith('win'):
    num_workers = 0
else:
    num_workers = 4
train_iter = torch.utils.data.DataLoader(
    mnist_train,batch_size = batch_size,shuffle = True,num_workers =  num_workers)
test_iter = torch.utils.data.DataLoader(
    mnist_test,batch_size = batch_size,shuffle = True,num_workers =  num_workers)
  
#设置输入28*28=784 输出设置为10(类别) 隐藏单元个数设置为256 两层的神经网络
num_inputs,num_outputs,num_hiddens = 784,10,256

w1 = torch.tensor(np.random.normal(0,0.01,(num_inputs,num_hiddens)),dtype = torch.float)
b1 = torch.zeros(num_hiddens,dtype = torch.float)
w2 = torch.tensor(np.random.normal(0,0.01,(num_hiddens,num_outputs)),dtype = torch.float)
b2 = torch.zeros(num_outputs,dtype = torch.float)

params = [w1,b1,w2,b2]
for param in params:
    param.requires_grad_(requires_grad = True)
#定义激活函数RELU
def relu(X):
    return torch.max(input=X,other = torch.tensor(0.0))

#定义模型  torch.matmul表示当 两者维度不一致时会自动填充到相应的维度进行点称
def net(X):
    X = X.view((-1,num_inputs))
    H = relu(torch.matmul(X,w1)+b1)
    return torch.matmul(H,w2)+b2

#定义损失函数
loss = torch.nn.CrossEntropyLoss()

#优化算法  小批量随机梯度下降算法
def sgd(params,lr,batch_size):
    for param in params:
        param.data -= lr*param.grad/batch_size

#在模型上评价数据集的准确率  .item()将Tensor转换为number
def evaluate_accuracy(data_iter,net):
    acc_sum,n = 0.0,0
    for X,y in data_iter:
        #计算判断准确的元素
        acc_sum += (net(X).argmax(dim=1)==y).float().sum().item()
        #通过shape 获得y的列元素
        n += y.shape[0]
    return acc_sum/n

#训练模型 
#由于这里定义的损失函数已经除了batch_size 且SGD函数也除l了batch_size(不需要的)
#因此这里的学习率设置的大一些 
num_epochs,lr= 5,100

def train_softmax(net,train_iter,test_iter,loss,num_epochs,batch_size,
                  params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        #损失值、正确数量、总数 初始化
        train_l_sum,train_acc_sum,n = 0.0,0.0,0
        
        for X,y in train_iter:
            y_hat = net(X)
            l = loss(y_hat,y).sum()
            
             # 梯度清零 损失函数和优化函数梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
                    
            l.backward()
            if optimizer is None:
                sgd(params, lr, batch_size)
            else:
                optimizer.step() 
            
            train_l_sum += l.item()
            train_acc_sum +=(y_hat.argmax(dim=1)==y).sum().item()
            n += y.shape[0]
            
        test_acc = evaluate_accuracy(test_iter,net)
        print('epoch %d, loss %.4f, train acc %.3f,test acc %.3f'
              %(epoch+1,train_l_sum/n,train_acc_sum/n,test_acc))
        
train_softmax(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)


你可能感兴趣的:(【PyTorch】基于PyTorch的多层感知机的实现)