pytorch网络模型训练(自学总结)

网络模型训练


1. 模型的保存与读取

有两种方式保存model

1)法1

  • 保留了结构和参数,所需内存较大
vgg16 = torchvision.models.vgg16(pretrained=False)
torch.save(vgg16,'./model1.pth')
#加载
model = torch.load(',/model1.pth')
print(model1)  #输出的是结构

2)法2

  • 只保留了参数
torch.save(vgg16.state_dict(),'model2.pth')
model = torch.load(',/model2.pth')
print(model) #输出parameter
#法2输出结构
from xxx import vgg16 
#在xxx.py文件中加载了vgg16,若自己的网络结构导入方式同
vgg16.load_state_dict(torch.load('model2.pth'))
print(vgg16) #输出的是结构

2. 完整的模型训练

1) 所需库(根据需要自行添加)

import torch.optim
import torchvision.datasets
from torch.nn import Sequential
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import Conv2d, Dropout, Flatten, Linear, CrossEntropyLoss
import torch.nn.functional as F
#数据集准备
from torch.utils.tensorboard import SummaryWriter

2) 数据加载

train_data = torchvision.datasets.MNIST(root='data',train=True,transform=torchvision.transforms.ToTensor(),download=False)
test_data = torchvision.datasets.MNIST(root='data',train=False,transform=torchvision.transforms.ToTensor(),download=False)
# print(len(train_data))
train_load = DataLoader(train_data,batch_size=64,shuffle=True,num_workers=0,drop_last=True)
test_load = DataLoader(test_data,batch_size=64,shuffle=True,num_workers=0,drop_last=True)

3)网络搭建

class Symbol(nn.Module):
    def __init__(self):
        super(Symbol, self).__init__()
        self.conv1 = Conv2d(1,32,3,1)
        self.conv2 = Conv2d(32,64,3,1)
        self.dropout1 = Dropout(0.25)
        self.dropout2 = Dropout(0.5)  #防止过拟合,提升泛化能力
        self.linear1 = Linear(9216,128)
        self.linear2 = Linear(128,10)
    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)  # 强制稀疏处理
        x = F.max_pool2d(x, 2)  # X为卷积核大小,2为步长
        x = self.dropout1(x)
        x = torch.flatten(x, 1)  # 降维,与后面的全连接层进行计算
        x = self.linear1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.linear2(x)
        output = F.log_softmax(x, dim=1)
        return output

4) 初始化

*调用模型–>定义损失函数–>定义激活函数–>初始化参数

sym = Symbol()
loss = CrossEntropyLoss()
optim = torch.optim.SGD(sym.parameters(),lr=1e-3)
total_acc = 0
train_step =0
test_step = 0
epochs = 30
writer = SummaryWriter()

5) 训练

模型–>经过loss function–>gradiant置0–>反向传播–>tensorboard加载

  • sym.train()
    sym.eval()
for i in range(epochs):
    print("-------the {} epoch-------".format(i+1))
    sym.train()     #使用到一些特殊layer的时候必须要加(eg. Dropout),其他时候加不加不影响
    for data in train_load:
        imgs, targets = data
        output = sym(imgs)
        loss_result = loss(output,targets)
        optim.zero_grad()
        loss_result.backward()
        optim.step()
        if train_step % 100 ==0:
            print("the loss of {} step is {}".format(train_step, loss_result))
            writer.add_scalar('train_loss',loss_result,train_step)
        train_step += 1
    sym.eval()          #同train

6) 测试

    total_test_loss = 0
    with torch.no_grad():   #只需要测试,不需要调整梯度
        for data in test_load:
            imgs,targets = data
            outputs = sym(imgs)
            loss_test = loss(outputs,targets)
            total_test_loss+=loss_test
            acc = (outputs.argmax(1)==targets).sum()
            total_acc+=acc
    print('the acc is {}'.format(total_acc))
    print('the test loss is {}'.format(total_test_loss/len(test_data)))
    writer.add_scalar('test_loss',total_test_loss,test_step)
    writer.add_scalar('acc',total_test_loss/len(test_data),test_step)
    test_step = test_step + 1

    torch.save(sym,'model_{}.pth'.format(i))
    #torch.save(sym.state_dict(),'model{}'.format(i))  #两种保存方式
    print('save')
writer.close()

3. 判断模型分类的正确率

Argmax

  • argmax(0):列概率最大的类别index
  • argmax(1):行概率最大的类别index
    多分类的时候,经过网络的输出是一个矩阵,每行是一个样本在所有类别上的概率组成的矢量,因此用argmax(1)找到一行中找到最大概率对应的index,其对应的类为网络最终判别的类
import torch
#判断分类是否正确
output = torch.tensor([[0.1,0.4,0.6],
                       [0.4,0.5,0.1],
                       [0.9,0.6,0.3]])
predic = output.argmax(1)
print(output.argmax(1))  #一行的最大值的index
# print(output.argmax(0))  #一列的最大值的index
target = torch.tensor([2,2,0])   #实际标签
#与实际标签比较
print(predic == target)
print((predic == target).sum()) #对应位置相等的个数

4. GPU训练

1)法1

...
sym = Symbol()
sym = sym.cuda()
loss = CrossEntropyLoss()
loss = loss.cuda()
...
imgs = imgs.cuda()
...
target = target.cuda()
...
#model和loss可以不重新赋值。imgs和target需要重新赋值

或者

...
sym = Symbol()
if sym.cuda.is_avaliable():
    model = model.cuda()
loss = CrossEntropyLoss()
loss.cuda()
...
  • 计时
import time
...
start = time.time()#计时开始的地方
...
end = time.time()#计时结束
time = end-start #时长

2)法2

程序开始就定义torch.device(“CPU”)/("CUDA),后面model,loss,imgs,target调用.to(device)

devide = torch.device("CPU")/("CUDA:0")
#or
device = torch,device('cuda' if torch.cuda.is_avaliable() else "cpu")

5.测试训练好的model

import torch
import torchvision.transforms
from main import Symbol   #调用模型
from PIL import Image
sym = Symbol()
img_path = '../xxx.png'
image = Image.open(img_path)
image = image.convert('RGB')  #png是四通道,除了RGB通道还有一个透明通道
transform = torchvision.transforms.Compose([torchvision.transforms.Resize[28,28],
                                            torchvision.transforms.ToTensor()])
image = transform(image)
model = torch.load('xxx.pth')
#可能需要torch。reshape(image,(1,3,28,28))
model.eval()
with torch.no_grad():
    output = model(image)
print(output)

你可能感兴趣的:(pytorch,深度学习,python,神经网络)