注意力机制笔记

 

AttentionLayer

参考:https://mp.weixin.qq.com/s?__biz=MzUyMjE2MTE0Mw==&mid=2247489371&idx=2&sn=4349d0cfa5ec500c78a51b644b364a74&chksm=f9d149c3cea6c0d5a7c2e84b054335dd9041dafc88756999ad5cc004f4ad7f3415d2e07a6a67&mpshare=1&scene=1&srcid=&sharer_sharetime=1589117380169&sharer_shareid=ab5aa3530015c5ae813227bf34b4fc84&key=e314468ae3d1009720461d98d87e7dfc5a47dc334c3eb0a51dc115dfd6fefa84e57bbdfed9caf8562622131756d200bd6fd9972a7bd94c3aef8982f0d821973b4ac45e3ab1671ef1e9632fd8cbb290e7&ascene=1&uin=MjIzODAyMTI0MA%3D%3D&devicetype=Windows+10+x64&version=62090070&lang=zh_CN&exportkey=AfZBLcYmYDTCsgRY3P8%2Fz3c%3D&pass_ticket=IEmN6%2FmqM09SX%2FoLnkRVOOjzamZgAF7ufafmLFUtnTHHiqJjgi%2BStQqWZ%2FpHCLcH

这个挺多的:

https://github.com/MichaelCaohn/GEC_reference_code/blob/2bbe069c693231fcc5b4d6a161e75468a1a6214e/kakao_brain/fairseq/fairseq/models/fconv.py

class AttentionLayer(nn.Module):
    def __init__(self, channel, reduction=64, multiply=True):
        super(AttentionLayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
                nn.Linear(channel, channel // reduction),
                nn.ReLU(inplace=True),
                nn.Linear(channel // reduction, channel),
                nn.Sigmoid()
                )
        self.multiply = multiply
    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        if self.multiply == True:
            return x * y
        else:
            return y

参考:https://github.com/libeibei95/gat_srgnn/blob/095e8d3a6151e5c17f72f41550f0c62428c09553/model/AttentionLayer.py

import numpy as np
import torch
from torch import nn
from torch.nn import Module, Parameter
import torch.nn.functional as F

class  AttentionLayer(nn.Module):
    def __init__(self, in_features, out_features, alpha = 0.1):
        super(AttentionLayer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features

        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        self.a = nn.Parameter(torch.zeros(size=(2*out_features, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

        self.alpha = alpha
        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, item_embeddings, card_embeddings, masks):
        '''
        :param item_embeddings: batch_size * item_cnt * hidden_size
        :param card_embeddings: batch_size * hidden_size
        '''
        batch_size, hidden_size = card_embeddings.size()
        item_cnt = item_embeddings.size()[1]
        card_embeddings = card_embeddings.repeat(1,item_cnt).view(batch_size, item_cnt, hidden_size)
        items = torch.mm(item_embeddings.view(-1, hidden_size), self.W)
        cards = torch.mm(card_embeddings.view(-1, hidden_size), self.W)
        att_input = torch.cat([items, cards], dim=1) #(batch_size*item_cnt)*(2*out_feature)
        e = self.leakyrelu(torch.matmul(att_input, self.a)).squeeze(1).view(batch_size, -1) # batch_size * item_cnt

        zero_vec = -9e10 * torch.ones_like(masks)
        zero_vec = zero_vec.float()
        attention = torch.where(masks == 1, e, zero_vec)
        attention = F.softmax(attention, dim = 1).unsqueeze(1) #batch_size*1*item_cnt
        card_hidden = torch.matmul(attention, item_embeddings).squeeze(1)
        return card_hidden

 

你可能感兴趣的:(深度学习)