通过学习 https://www.bilibili.com/video/BV16S4y1P7PN ,完成笔记记录
# 引入必要的包
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as Data
# 在gpu上训练模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
定义函数:输入数字,如果15取余=0返回3,5取余返回2,3取余返回1,否则返回数字本身
def fizzbuzz_encode(number):
"""
number: int
"""
if number % 15 == 0:
return 3 # 'fizzbuzz'
elif number % 5 == 0:
return 2 # 'buzz'
elif number % 3 == 0:
return 1 # 'fizz'
return 0 # str(number)
通过0 1 2 3返回对应的label
def fizzbuzz_decode(number, label):
"""
number: int
label: 0 1 2 3
"""
return [str(number), 'fizz', 'buzz', 'fizzbuzz'][label]
测试
def helper(number):
print(fizzbuzz_decode(number, fizzbuzz_encode(number)))
for i in range(1, 16):
helper(i)
result
1
2
fizz
4
buzz
fizz
7
8
fizz
buzz
11
fizz
13
14
fizzbuzz
通过转换成二进制形式,获得更多输入特征,因为数字本身是一个int,而转换成长度为10的二进制数,输入到模型中,获得的特征从1变到10
# 转二进制
NUM_DIGITS = 10
def binary_encode(number):
return np.array([number >> d & 1 for d in range(NUM_DIGITS)][::-1])
# x_train 101-1024的二进制数
x_train = torch.Tensor([binary_encode(number) for number in range(101, 1024)])
# y_train 101-1024对应的0 1 2 3
y_train = torch.LongTensor([fizzbuzz_encode(number) for number in range(101, 1024)])
class MyDataset(Data.Dataset):
def __init__(self, x_train, y_train):
self.x_train = x_train # 跨函数调用需要传入self. 成员变量里面
self.y_train = y_train
def __getitem__(self, idx):
return x_train[idx], y_train[idx]
def __len__(self):
return len(self.x_train)
构造dataset,dataloader
train_dataset = MyDataset(x_train, y_train)
train_loader = Data.DataLoader(train_dataset, batch_size=16, shuffle=True)
class MyModel(nn.Module):
def __init__(self, dim1, dim2):
super(MyModel, self).__init__()
self.linear = nn.Sequential(
nn.Linear(10, dim1),
nn.ReLU(),
nn.Linear(dim1, dim2),
nn.ReLU(),
nn.Linear(dim2, 4),
)
# self.linear1 = nn.Linear(10, dim1)
# self.activation1 = nn.ReLU()
# self.linear2 = nn.Linear(dim1, dim2)
# self.activation2 = nn.ReLU()
# self.linear3 = nn.Linear(dim2, 4)
def forward(self, x):
# output = self.linear1(x) # => [batch_size, 64]
# output = self.activation1(output) # => [batch_size, 64]
# output = self.linear2(output) # => [batch_size, 8]
# output = self.activation2(output) # => [batch_size, 8]
# output = self.linear3(output) # => [batch_size, 4]
output = self.linear(x)
return output
model = MyModel(64, 8).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-2)
for k, v in model.named_parameters():
if k == 'linear1.weight':
print(v.shape)
Epoch = 100
for i, epoch in enumerate(range(Epoch)):
for x, y in train_loader:
x, y = x.to(device), y.to(device)
pred = model(x)
loss = loss_fn(pred, y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
if i % 10 == 0:
print(f'epoch:{epoch} ', loss)
result
epoch:0 tensor(1.1197, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:10 tensor(1.2373, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:20 tensor(0.5622, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:30 tensor(0.2674, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:40 tensor(0.0579, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:50 tensor(0.0401, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:60 tensor(0.0349, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:70 tensor(0.0101, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:80 tensor(0.0516, device='cuda:0', grad_fn=<NllLossBackward0>)
epoch:90 tensor(0.0147, device='cuda:0', grad_fn=<NllLossBackward0>)
# 定义一个测试用例
x_test = torch.Tensor([binary_encode(12)]).to(device)
# 经过model返回输出的预测值
pred = model(x_test) # tensor([[-13.6835, 1.4897, -16.4106, -22.5247]], device='cuda:0',grad_fn=)
# softmax
softmax = nn.Softmax()
pred = softmax(pred) # tensor([[2.5724e-07, 1.0000e+00, 1.6826e-08, 3.7212e-11]], device='cuda:0',grad_fn=)
# 返回最大值的下标
result = np.argmax(pred.cpu().detach().numpy())
result # 1