import torch
import math
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# 定义模型
class Transformer(nn.Module):
def __init__(self, input_size, output_size, hidden_size=512, num_layers=6, num_heads=8):
super().__init__()
self.embedding = nn.Embedding(input_size, hidden_size)
self.pos_encoder = PositionalEncoding(hidden_size, 0.1)
encoder_layer = nn.TransformerEncoderLayer(hidden_size, num_heads)
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers)
self.decoder = nn.Linear(hidden_size, output_size)
def forward(self, src):
src = self.embedding(src)
src = self.pos_encoder(src)
output = self.transformer_encoder(src)
output = self.decoder(output)
return output
# 定义数据集
class MyDataset(Dataset):
def __init__(self, data, labels):
self.data = data
self.labels = labels
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.data[index], self.labels[index]
# 定义位置编码层
class PositionalEncoding(nn.Module):
def __init__(self, d_model, dropout=0.1, max_len=5000):
super().__init__()
self.dropout = nn.Dropout(p=dropout)
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)
def forward(self, x):
x = x + self.pe[:x.size(0), :]
return self.dropout(x)
# 进行训练
def train(model, train_loader, criterion, optimizer, scheduler=None, num_epochs=10):
for epoch in range(num_epochs):
for data, label in train_loader:
data = data.to(device)
label = label.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, label)
loss.backward()
optimizer.step()
if scheduler is not None:
scheduler.step()
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')
# 示例数据
data = [[1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [2, 3, 1, 5, 4], [4, 2, 1, 3, 5]]
labels = [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]
# 转换为Tensor类型
data = torch.LongTensor(data)
labels = torch.LongTensor(labels)
# 定义超参数
input_size = 6
output_size = 4
hidden_size = 512
num_layers = 6
num_heads = 8
batch_size = 2
num_epochs = 100
learning_rate = 0.01
device='cpu'
# 创建模型和数据集
model = Transformer(input_size, output_size, hidden_size, num_layers, num_heads).to(device)
dataset = MyDataset(data, labels)
# 定义数据加载器和损失函数
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
criterion = nn.CrossEntropyLoss()
# 定义优化器和学习率调度器
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.95)
# 开始训练
train(model, train_loader, criterion, optimizer, scheduler, num_epochs)