Pytorch半精度训练(以两层BP网络为例) 与正常训练对比及loss可视化

Pytorch半精度训练,只需要修改以下内容:
Variables:

x,y = x.cuda().half(),y.cuda().half()

model:

model.cuda().half()

以两层累积BP网络为例,数据采用西瓜数据集3.0,分别使用全精度训练和半精度训练,发现在本文中网络很小的情况下,二者的loss曲线几乎是一样的(见下图),二者的测试结果也是一样的。

另外,使用Adam优化器注意需要设置eps参数,否则loss会报NaN:
使用SGD优化器则没有这个问题。

optimizer = optim.Adam(self.model.parameters(), lr=0.8,eps=1e-3)

Adam出现NaN的问题在这个博客中有具体提到。

Pytorch半精度训练(以两层BP网络为例) 与正常训练对比及loss可视化_第1张图片

完整代码

# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
seed = 2019
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
import random
np.random.seed(seed)  # Numpy module.
random.seed(seed)  # Python random module.
torch.manual_seed(seed)

import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
plt.close('all')

def preprocess(data):
    #将好瓜坏瓜映射为1/0
    for title in data.columns:
        if data[title].dtype=='object':
            encoder = LabelEncoder()
            data[title] = encoder.fit_transform(data[title])         
    #归一化
    ss = StandardScaler()
    X = data.drop('好瓜',axis=1)
    Y = data['好瓜']
    X = ss.fit_transform(X)
    x,y = np.array(X),np.array(Y).reshape(Y.shape[0],1)
    return x,y

class BP_pytorch():##对BPnet训练测试,采用类似sklearn的fit和predict函数
    def __init__(self,n_samples,in_features,dim,max_iter=1000,use_cuda=torch.cuda.is_available(),half=False):
        self.max_iter = max_iter
        self.use_cuda = use_cuda
        self.half = half
        self.model = BPnet(n_samples,in_features,dim)
    #训练
    def fit(self,x,y):
        #numpy转pytorch
        x,y = x.astype(np.float32),y.astype(np.float32)
        x,y = torch.from_numpy(x),torch.from_numpy(y)#torch.float32
        x = torch.autograd.Variable(x,requires_grad=True)
        y = torch.autograd.Variable(y,requires_grad=True)  
        if self.half:
            print('half training is used')
            x,y = x.cuda().half(),y.cuda().half()
            self.model.cuda().half()
            print(self.model)
        if self.use_cuda:
            print('cuda is used')
            x,y = x.cuda(),y.cuda()
            self.model.cuda()            
            print(self.model)
        criterion = nn.MSELoss()
        # optimizer = optim.SGD(self.model.parameters(),lr=0.8)    
        optimizer = optim.Adam(self.model.parameters(), lr=0.8,eps=1e-3)
        losslist = []
        for ite in range(self.max_iter):
            optimizer.zero_grad()
            out2 = self.model(x)
            loss = criterion(y,out2)
            losslist.append(loss)
            print('iter:%d  loss:%.4f'%(ite,loss))
            loss.backward()
            optimizer.step()
            state = {'model': self.model.state_dict(), 'epoch': ite}
            torch.save(state, self.model.name())
            
        xx = [i+1 for i in range(self.max_iter)]
        name = 'half' if self.half else 'full'
        plt.plot(xx,losslist,label=name)
        plt.title('Loss Curve')
        plt.xlabel('iteration')
        plt.ylabel('loss')
    #测试        
    def predict(self,x):
        checkpoint = torch.load(self.model.name())
        self.model.load_state_dict(checkpoint['model'])
        
        x = x.astype(np.float32)
        x = torch.from_numpy(x)
        x = torch.autograd.Variable(x,requires_grad=False)
        if self.half:
            x = x.cuda().half()
            self.model.cuda().half()            
        if self.use_cuda:
            x = x.cuda()
            self.model.cuda()
        
        with torch.no_grad():
            out2 = self.model(x) 
        out2 = out2.cpu()
        y_pred = np.round(out2.data.numpy())   
        return y_pred
        
            
##pytorch搭建网络
class BPnet(nn.Module):
    def __init__(self,n_samples,in_features,dim):
        super(BPnet,self).__init__()
        self.w1 = nn.Parameter(torch.zeros(in_features,dim))
        self.b1 = nn.Parameter(torch.zeros(n_samples,dim))
        self.w2 = nn.Parameter(torch.zeros(dim,1))
        self.b2 = nn.Parameter(torch.zeros(n_samples,1))
        self.sigmoid = nn.Sigmoid()
    def forward(self,x):
        u1 = torch.mm(x,self.w1)+self.b1
        out1 = self.sigmoid(u1)
        u2 = torch.mm(out1,self.w2)+self.b2
        out2 = self.sigmoid(u2)
        return out2
    def name(self):
        return 'BPnet'

def main():
    data = pd.read_table('watermelon30.txt',delimiter=',')
    data.drop('编号',axis=1,inplace=True)    
    x,y = preprocess(data)
    plt.figure()
    
    model = BP_pytorch(n_samples=x.shape[0],in_features=x.shape[1],dim=10,half=False)###全精度
    model.fit(x,y)
    y_pred = model.predict(x)
    print(np.hstack((y_pred,y)))
    
    model2 = BP_pytorch(n_samples=x.shape[0],in_features=x.shape[1],dim=10,half=True)###半精度
    model2.fit(x,y)
    print(model2)
    y_pred2= model2.predict(x)
    print(np.hstack((y_pred2,y)))
    
    plt.legend(loc='upper right')
    plt.show()
    result = pd.DataFrame(np.hstack((y,y_pred)),columns=['真值','预测'] )     
    result.to_excel('result_pytorch.xlsx',index=False)   
            
if __name__ == "__main__":
    main()

你可能感兴趣的:(深度学习)