LeNet神经网络 Pytorch实现 计算模型参数量 模型保存和加载

LeNet网络模型结构:
LeNet神经网络 Pytorch实现 计算模型参数量 模型保存和加载_第1张图片
在本次实验中,使用的是fashionMNIST数据集,图片大小为28x28,所以这里LeNet模型的输入就是28x28。
代码实现:

import os
import torch
import torch.nn as nn
import sys
import time
import d2lzh_pytorch as d2l

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(torch.__version__)
print(device)
start = time.time()
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # kernel_size, stride
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(16*4*4, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )

    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output
net = LeNet()
print(net)
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)

def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval() # 评估模式, 这会关闭dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train() # 改回训练模式
            else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
    return acc_sum / n
lr, num_epochs = 0.001, 10
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
#train
net = net.to(device)
print("training on ", device)
loss = torch.nn.CrossEntropyLoss()
batch_count = 0
total_time = 0
for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
    for X, y in train_iter:
        X = X.to(device)
        y = y.to(device)
        y_hat = net(X)
        l = loss(y_hat, y)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        train_l_sum += l.cpu().item()
        train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
        n += y.shape[0]
        batch_count += 1
    test_acc = evaluate_accuracy(test_iter, net)
    total_time += round(time.time()-start,2)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
            % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))

end = time.time()
print('total time:%.2f sec'%(total_time))

分析:
由于 maxpool 过程中只改变 feature map 的大小,不改变其通道数,所以不产生参数,所以对于模型中参数量的计算只需要计算卷积层和全连接层的参数即可。对于卷积层而言,每层的参数=kernel_sizeinput_channeloutput_channel+bias;对于全连接层而言,每层的参数量=上次输出本次输出+1。也即是总的参数量为: total_para=(5516+6)+(55616+16)+(5516120+1)+(12084+1)+(8410+1)=156+2416+48001+10081+841=61945

模型参数保存:

(代码只在训练过程改变,设置epoch=200)

for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
    for X, y in train_iter:
        X = X.to(device)
        y = y.to(device)
        y_hat = net(X)
        l = loss(y_hat, y)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        train_l_sum += l.cpu().item()
        train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
        n += y.shape[0]
        batch_count += 1
    test_acc = evaluate_accuracy(test_iter, net)
    total_time += round(time.time()-start,2)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
            % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
    if (epoch + 1) % 200 == 0:
        print('------------ Saving model ------------')
        torch.save(net.state_dict(),'/home3/code/model_num_epoch-' + str(epoch + 1)+ '.pytorch')
        print('------------ Done ------------')

加载模型继续训练:

for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
    for X, y in train_iter:
        X = X.to(device)
        y = y.to(device)
        y_hat = net(X)
        net.load_state_dict(torch.load('/home3/code/model_num_epoch-200.pytorch'))
        Y_hat = net(X)
        l = loss(Y_hat, y)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        train_l_sum += l.cpu().item()
        train_acc_sum += (Y_hat.argmax(dim=1) == y).sum().cpu().item()
        n += y.shape[0]
        batch_count += 1
    test_acc = evaluate_accuracy(test_iter, net)
    total_time += round(time.time()-start,2)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
            % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))

代码结果:
training on cuda
epoch 1, loss 0.2061, train acc 0.921, test acc 0.880, time 3.4 sec
epoch 2, loss 0.1027, train acc 0.923, test acc 0.881, time 3.0 sec
epoch 3, loss 0.0683, train acc 0.922, test acc 0.878, time 2.9 sec
epoch 4, loss 0.0515, train acc 0.922, test acc 0.879, time 3.0 sec
epoch 5, loss 0.0413, train acc 0.921, test acc 0.880, time 3.0 sec
epoch 6, loss 0.0344, train acc 0.922, test acc 0.878, time 2.9 sec
epoch 7, loss 0.0294, train acc 0.922, test acc 0.880, time 3.0 sec
epoch 8, loss 0.0257, train acc 0.922, test acc 0.879, time 3.1 sec
epoch 9, loss 0.0229, train acc 0.922, test acc 0.881, time 3.1 sec
epoch 10, loss 0.0206, train acc 0.922, test acc 0.880, time 3.0 sec
total time:30.41 sec

你可能感兴趣的:(深度学习,pytorch,神经网络)