文本匹配论文及pytorch版复现(一):DRCN

一、模型

1、模型总图

文本匹配论文及pytorch版复现(一):DRCN_第1张图片

2、实施细则

文本匹配论文及pytorch版复现(一):DRCN_第2张图片

二、相关公式

1、词表示层:字符卷积cp,静态预训练词嵌入ep(fix),可训练预训练词嵌入ep(tr)

fp代表,两句中字是否在另一句中出现,若是为1,若否为0

文本匹配论文及pytorch版复现(一):DRCN_第3张图片

2、层次RNN和互注意力机制

文本匹配论文及pytorch版复现(一):DRCN_第4张图片

原始输入x,通过RNN获取隐层表示h,将两句的隐层表示hi,hj通过互注意力机制,获取注意力表示ai,aj

3、自动编码器auto-encoder

全连接层,将高维度表示转化为低维度表示

4、交互层和分类层

More specifically, if the output of the final RNN layer is a 100d vector for a sentence with 30 words, a 30 × 100 matrix is obtained which is max-pooled column-wise such that the size of the resultant vector p or q is 100.

将p,q向量通过maxpooling获取特征。再进入如下融合

最终表示

最终通过线性变换和激活函数,得到输出结果。

 

三、代码实现

1、模型model.py

from torch import nn, tensor
import numpy as np
import torch
import joblib

# 总体模型
class DRCN(nn.Module):

    def __init__(self, word2vec=None):
        super(DRCN, self).__init__()
        num_embeddings = 14206
        num_classes = 2
        embedding_dim = 300
        lstm_dim = 100
        ae_dim = 200
        fc_dim = 1000
        embed_dp = 0.5
        ae_dp = 0.2
        lc_dp = 0.2

        self.embed = WordEmbedding(num_embeddings, embedding_dim, word2vec, dropout=embed_dp)
        input_size = embedding_dim * 2 + 1
        self.rnn1 = RNNCoAttention(input_size, lstm_dim)
        input_size += 4 * lstm_dim
        self.rnn2 = RNNCoAttention(input_size, lstm_dim)
        input_size += 4 * lstm_dim
        self.rnn3 = RNNCoAttention(input_size, lstm_dim)
        input_size += 4 * lstm_dim
        self.ae1 = AutoEncoder(input_size, ae_dim, dropout=ae_dp)
        input_size = ae_dim
        self.rnn4 = RNNCoAttention(input_size, lstm_dim)
        input_size += 4 * lstm_dim
        self.rnn5 = RNNCoAttention(input_size, lstm_dim)
        input_size += 4 * lstm_dim
        self.ae2 = AutoEncoder(input_size, ae_dim, dropout=ae_dp)
        self.fc = InteractPredict(5 * ae_dim + 1, fc_dim, num_classes, lc_dp)

    def forward(self, p, q, fp, fq):
        loss = 0
        p, q = self.embed(p, q, fp, fq)
        p, q = self.rnn1(p, q)
        p, q = self.rnn2(p, q)
        p, q = self.rnn3(p, q)
        p, q, ae_loss = self.ae1(p, q)
        loss += ae_loss
        p, q = self.rnn4(p, q)
        p, q = self.rnn5(p, q)
        p, q, ae_loss = self.ae2(p, q)
        loss += ae_loss
        y = self.fc(p, q)
        return y, loss


# 词嵌入层
class WordEmbedding(nn.Module):
    # word2vec. gloVe 300dim
    def __init__(self, num_embeddings, embedding_dim, word2vec=None, dropout=0.5):
        super(WordEmbedding, self).__init__()
        # 设置 no trainable
        self.static_embed = nn.Embedding(num_embeddings, embedding_dim, 0)
        if word2vec is not None:
            self.static_embed.weight.data.copy_(torch.from_numpy(word2vec))
            self.static_embed.weight.requires_grad = False
        else:
            self.static_embed.weight.requires_grad = True

        # 设置 trainable
        self.train_embed = nn.Embedding(num_embeddings, embedding_dim, 0)
        if word2vec is not None:
            self.train_embed.weight.data.copy_(torch.from_numpy(word2vec))
        self.train_embed.weight.requires_grad = True

        self.dropout = nn.Dropout(dropout)

    def forward(self, xp, xq, fp, fq):
        xp = self.singleForword(xp, fp)
        xq = self.singleForword(xq, fq)
        return xp, xq

    def singleForword(self, x, f):
        x = torch.cat([self.train_embed(x), self.static_embed(x), f.unsqueeze(2)], dim=2)
        x = self.dropout(x)
        return x


# RNN互注意力层
class RNNCoAttention(nn.Module):
    # out_size = in_size + 4 * hi_size
    def __init__(self, input_size, hidden_size):
        super(RNNCoAttention, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, bidirectional=True)

    def forward(self, xp, xq):
        hp, _ = self.lstm(xp)
        hq, _ = self.lstm(xq)
        ap, aq = self.co_attention(hp, hq)
        xp = torch.cat([hp, ap, xp], dim=2)
        xq = torch.cat([hq, aq, xq], dim=2)
        return xp, xq

    def co_attention(self, hp, hq):
        batch_size, seq_len, embedding_dim = hp.size()

        mul_p = torch.cat([hp.unsqueeze(1) for i in range(seq_len)], dim=1)
        mul_p = mul_p.transpose(1, 2).contiguous().view(batch_size, -1, embedding_dim)
        mul_q = torch.cat([hq for i in range(seq_len)], dim=1)

        p2q = torch.cosine_similarity(mul_p, mul_q, dim=2)
        p2q = p2q.view(batch_size, seq_len, seq_len)
        q2p = p2q.transpose(1, 2)

        p2q_soft = torch.softmax(p2q, dim=2)
        ap = torch.matmul(p2q_soft, hq)

        q2p_soft = torch.softmax(q2p, dim=2)
        aq = torch.matmul(q2p_soft, hp)
        return ap, aq


#自动编码器层
class AutoEncoder(nn.Module):

    def __init__(self, input_size, hidden_size, dropout=0.2):
        super(AutoEncoder, self).__init__()
        self.fc1 = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.Dropout(dropout)
        )
        self.fc2 = nn.Linear(hidden_size, input_size)
        self.loss_func = nn.MSELoss()

    def forward(self, xp, xq):
        hp, lp = self.singleForword(xp)
        hq, lq = self.singleForword(xq)
        loss = lp + lq
        return hp, hq, loss

    def singleForword(self, x):
        h = self.fc1(x)
        y = self.fc2(h)
        l = self.loss_func(x, y)
        return h, l


#信息交互与预测层
class InteractPredict(nn.Module):

    def __init__(self, input_size, hidden_size, num_classes, dropout=0.2):
        super(InteractPredict, self).__init__()

        self.dropout = nn.Dropout(dropout)

        self.fc1 = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.LeakyReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.LeakyReLU(),
        )

        self.fc2 = nn.Sequential(
            nn.Linear(hidden_size, num_classes),
        )

    def forward(self, p, q):
        v = self.interaction(p, q)
        v = self.dropout(v)
        v = self.fc1(v)
        v = self.fc2(v)
        return v

    def interaction(self, p, q):
        p, _ = p.max(dim=1)
        q, _ = q.max(dim=1)
        asymmetric = p - q  # asymmetric
        asy_mod = ((asymmetric ** 2).sum(dim=1) ** 0.5).unsqueeze(1)
        v = torch.cat([p, q, p + q, p * q, asymmetric, asy_mod], dim=1)
        return v


#模拟数据
if __name__ == '__main__':
    batch_size = 32
    seq_len = 20
    embedding_dim = 300
    num_embeddings = 14205 + 1
    lstm_dim = 100
    ae_dim = 200
    fc_dim = 1000
    num_classes = 2

    p = tensor(np.random.randint(0, num_embeddings, (batch_size, seq_len)), dtype=torch.long)
    q = tensor(np.random.randint(0, num_embeddings, (batch_size, seq_len)), dtype=torch.long)
    pf = tensor(np.random.random((batch_size, seq_len)), dtype=torch.float)
    qf = tensor(np.random.random((batch_size, seq_len)), dtype=torch.float)
    model = DRCN()
    y, l = model(p, q, pf, qf)
    print(y.size())

 

你可能感兴趣的:(pytorch,text,matching,文本匹配)