1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
|
import torch from torch import nn, optim from torch.nn import functional as F from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence from torch.utils import tensorboard from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
from utils import load_sentence_polarity
summary = tensorboard.SummaryWriter('./runs')
class LstmDataset(Dataset): def __init__(self, data): self.data = data
def __len__(self): return len(self.data)
def __getitem__(self, i): return self.data[i]
def collate_fn(examples): lengths = torch.tensor([len(ex[0]) for ex in examples]) inputs = [torch.tensor(ex[0]) for ex in examples] targets = torch.tensor([ex[1] for ex in examples], dtype=torch.long) inputs = pad_sequence(inputs, batch_first=True) return inputs, lengths, targets
class LSTM(nn.Module): def __init__(self, vocab_size, embedding_dim, hidden_dim, num_class): super(LSTM, self).__init__() self.embeddings = nn.Embedding(vocab_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True, num_layers=3) self.output = nn.Linear(hidden_dim * 6, num_class)
def forward(self, inputs, lengths): embeddings = self.embeddings(inputs) x_pack = pack_padded_sequence(embeddings, lengths, batch_first=True, enforce_sorted=False) hidden, (hn, cn) = self.lstm(x_pack) outputs = self.output(hn.permute(1, 0, 2).reshape(-1, 6 * 256)) log_probs = F.log_softmax(outputs, dim=-1) return log_probs
embedding_dim = 128 hidden_dim = 256 num_class = 2 batch_size = 32 num_epoch = 20
train_data, test_data, vocab = load_sentence_polarity() train_dataset = LstmDataset(train_data) test_dataset = LstmDataset(test_data) train_data_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate_fn, shuffle=True) test_data_loader = DataLoader(test_dataset, batch_size=1, collate_fn=collate_fn, shuffle=False)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = LSTM(len(vocab), embedding_dim, hidden_dim, num_class) model.to(device)
nll_loss = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) scheduler = optim.lr_scheduler.StepLR(optimizer, 15, gamma=0.99)
model.train() for epoch in range(num_epoch): total_loss = 0 for i, batch in tqdm(enumerate(train_data_loader), desc=f"Training Epoch {epoch}"): inputs, lengths, targets = [x.to(device) for x in batch] lengths = lengths.cpu() log_probs = model(inputs, lengths) loss = nll_loss(log_probs, targets) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() summary.add_scalar('train_batch_loss', total_loss, epoch * len(train_data_loader) + i) scheduler.step() print(f"Loss: {total_loss:.2f}, lr: {scheduler.get_last_lr()[0]}") summary.add_scalar('train_epoch_loss', total_loss, epoch) summary.add_scalar('train_epoch_lr', scheduler.get_last_lr()[0], epoch)
acc = 0 for batch in tqdm(test_data_loader, desc=f"Testing"): inputs, lengths, targets = [x.to(device) for x in batch] lengths = lengths.cpu()
with torch.no_grad(): output = model(inputs, lengths) acc += (output.argmax(dim=1) == targets).sum().item()
print(f"Acc: {acc / len(test_data_loader):.2f}")
|