Siamese Network简单复现

Siamese Network论文地址

最近在看NLP相关的孪生模型,看到这篇最原始的论文,突然想用pytorch实现一下,那就像下面这样吧!

模型:

import torch
import torch.nn as nn

class SIAMESE(torch.nn.Module):
    def __init__(self):
        super().__init__()

        self.batch_size = 6
        self.seq_len = 8
        self.embedding_size = 64
        self.lstm_hidden_size = 128
        self.word_size = 100

        self.word_embedding = nn.Embedding(self.word_size, self.embedding_size, padding_idx=self.seq_len)

        self.encoder_leyer = nn.LSTM(input_size=64, hidden_size=128, bidirectional=True, batch_first=True)
        self.aggregate = nn.Linear(4 * self.lstm_hidden_size, 2)
        self.softmax = nn.Softmax()

    def forward(self, seq1, seq2):
        sq1_embedding = self.word_embedding(seq1)
        sq2_embedding = self.word_embedding(seq2)
        print("sq1_embedding:{}".format(sq1_embedding.shape)) #torch.Size([6, 8, 64])
        encoder1 = self.encoder_leyer(sq1_embedding) #Outputs: output, (h_n, c_n)
        encoder2 = self.encoder_leyer(sq2_embedding) 
        print("encoder1:{}".format(encoder1[1][1].shape)) #torch.Size([2, 6, 128])
        encoder1_trans = torch.transpose(encoder1[1][1], 0, 1)
        encoder2_trans = torch.transpose(encoder2[1][1], 0, 1)
        print("encoder1_trans:{}".format(encoder1_trans.shape))
        encoder = torch.cat([encoder1_trans, encoder2_trans], dim=1)
        encoder_agg = torch.squeeze(encoder.view(self.batch_size,1,-1))
        agg = self.aggregate(encoder_agg)
        output = torch.softmax(agg, dim=-1)
        return output

模拟一个简单的训练:

seq1 = torch.randint(1, 100, (6,8))
seq2 = torch.randint(1, 100, (6,8))
label = torch.LongTensor([1,1,0,0,1,1])

siamese = SIAMESE()
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(siamese.parameters(), lr=0.1)

for i in range(10):
    out_put = siamese(seq1, seq2)
    # print(out_put)
    loss = loss_func(out_put, label)
    print("loss --> {}".format(loss))
    loss.backward()
    optimizer.step()
    # for key in siamese.named_parameters():
    #     print(key)

你可能感兴趣的:(深度学习,python,nlp)