NLP pytorch 模型训练过程

通过学习 https://www.bilibili.com/video/BV16S4y1P7PN ,完成笔记记录

# 引入必要的包
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as Data
# 在gpu上训练模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

一、数据集构建

1.encode

定义函数:输入数字,如果15取余=0返回3,5取余返回2,3取余返回1,否则返回数字本身

def fizzbuzz_encode(number):
    """
    number: int
    """
    if number % 15 == 0:
        return 3 # 'fizzbuzz'
    elif number % 5 == 0:
        return 2 # 'buzz'
    elif number % 3 == 0:
        return 1 # 'fizz'
    return 0 # str(number)
    

2.decode

通过0 1 2 3返回对应的label

def fizzbuzz_decode(number, label):
    """
    number: int
    label: 0 1 2 3
    """
    return [str(number), 'fizz', 'buzz', 'fizzbuzz'][label]

测试

def helper(number):
    print(fizzbuzz_decode(number, fizzbuzz_encode(number)))

for i in range(1, 16):
    helper(i)

result

1
2
fizz
4
buzz
fizz
7
8
fizz
buzz
11
fizz
13
14
fizzbuzz

通过转换成二进制形式,获得更多输入特征,因为数字本身是一个int,而转换成长度为10的二进制数,输入到模型中,获得的特征从1变到10

# 转二进制
NUM_DIGITS = 10
def binary_encode(number):
    return np.array([number >> d & 1 for d in range(NUM_DIGITS)][::-1])
# x_train 101-1024的二进制数
x_train = torch.Tensor([binary_encode(number) for number in range(101, 1024)])  
# y_train 101-1024对应的0 1 2 3
y_train = torch.LongTensor([fizzbuzz_encode(number) for number in range(101, 1024)])

3.通过x_train,y_train建立dataset

class MyDataset(Data.Dataset):
    def __init__(self, x_train, y_train):
        self.x_train = x_train  # 跨函数调用需要传入self. 成员变量里面
        self.y_train = y_train
    
    def __getitem__(self, idx):
        return x_train[idx], y_train[idx]
    
    def __len__(self):
        return len(self.x_train)

构造dataset,dataloader

train_dataset = MyDataset(x_train, y_train)
train_loader = Data.DataLoader(train_dataset, batch_size=16, shuffle=True)

二、模型构建

class MyModel(nn.Module):
    def __init__(self, dim1, dim2):
        super(MyModel, self).__init__()
        self.linear = nn.Sequential(
            nn.Linear(10, dim1),
            nn.ReLU(),
            nn.Linear(dim1, dim2),
            nn.ReLU(),
            nn.Linear(dim2, 4),
        )
        
#         self.linear1 = nn.Linear(10, dim1)
#         self.activation1 = nn.ReLU()
#         self.linear2 = nn.Linear(dim1, dim2)
#         self.activation2 = nn.ReLU()
#         self.linear3 = nn.Linear(dim2, 4)
    
    def forward(self, x):
#         output = self.linear1(x) # => [batch_size, 64]
#         output = self.activation1(output) # => [batch_size, 64]
#         output = self.linear2(output) # => [batch_size, 8]
#         output = self.activation2(output) # => [batch_size, 8]
#         output = self.linear3(output) # => [batch_size, 4]
        output = self.linear(x)
        return output
model = MyModel(64, 8).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-2)
for k, v in model.named_parameters():
    if k == 'linear1.weight':
        print(v.shape)

三、训练

Epoch = 100
for i, epoch in enumerate(range(Epoch)):
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        pred = model(x)
        loss = loss_fn(pred, y)
    
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    if i % 10 == 0:
        print(f'epoch:{epoch}  ', loss)

result

epoch:0   tensor(1.1197, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:10   tensor(1.2373, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:20   tensor(0.5622, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:30   tensor(0.2674, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:40   tensor(0.0579, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:50   tensor(0.0401, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:60   tensor(0.0349, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:70   tensor(0.0101, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:80   tensor(0.0516, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:90   tensor(0.0147, device='cuda:0', grad_fn=<NllLossBackward0>)

四、预测

# 定义一个测试用例
x_test = torch.Tensor([binary_encode(12)]).to(device)
# 经过model返回输出的预测值
pred = model(x_test) # tensor([[-13.6835,   1.4897, -16.4106, -22.5247]], device='cuda:0',grad_fn=)
# softmax
softmax = nn.Softmax()
pred = softmax(pred) # tensor([[2.5724e-07, 1.0000e+00, 1.6826e-08, 3.7212e-11]], device='cuda:0',grad_fn=)
# 返回最大值的下标
result = np.argmax(pred.cpu().detach().numpy())
result # 1

注意:

  1. 跨函数调用需要传入self. 成员变量里面
  2. 训练三部曲
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
  3. 整个代码主要部分是数据处理的部分,其余代码相对较少

你可能感兴趣的:(人工智能,深度学习,pytorch)