DL之self-attention:self-attention自注意力机制模块思路的8个步骤及其代码实现

DL之self-attention:self-attention自注意力机制模块思路的8个步骤及其代码实现

 

 

目录

代码实现


 

 

相关文章
DL之Attention:Attention的简介、应用领域之详细攻略
DL之self-attention:self-attention的简介、应用之详细攻略

 

代码实现

import torch

#1、准备输入:Input 1、2、3
x = [[1, 0, 1, 0], 
     [0, 2, 0, 2],
     [1, 1, 1, 1]  ]
x = torch.tensor(x, dtype=torch.float32)


#2、初始化权重
w_key   = [ [0, 0, 1], [1, 1, 0], [0, 1, 0], [1, 1, 0] ]
w_query = [ [1, 0, 1], [1, 0, 0], [0, 0, 1], [0, 1, 1] ]
w_value = [ [0, 2, 0], [0, 3, 0], [1, 0, 3], [1, 1, 0] ]

w_key   = torch.tensor(w_key,   dtype=torch.float32)
w_query = torch.tensor(w_query, dtype=torch.float32)
w_value = torch.tensor(w_value, dtype=torch.float32)


#3、推导键、查询和值
keys   = x @ w_key
querys = x @ w_query
values = x @ w_value

print(keys)   # tensor([[0., 1., 1.], [4., 4., 0.], [2., 3., 1.]])
print(querys) # tensor([[1., 0., 2.], [2., 2., 2.], [2., 1., 3.]])
print(values) # tensor([[1., 2., 3.], [2., 8., 0.], [2., 6., 3.]])


#4、计算注意力得分
attn_scores = querys @ keys.t()
# tensor([[ 2.,  4.,  4.],  # attention scores from Query 1
#         [ 4., 16., 12.],  # attention scores from Query 2
#         [ 4., 12., 10.]]) # attention scores from Query 3



#5、计算softmax
from torch.nn.functional import softmax
attn_scores_softmax = softmax(attn_scores, dim=-1)
print('attn_scores_softmax:','\n',attn_scores_softmax)
# tensor([[6.3379e-02, 4.6831e-01, 4.6831e-01],
#         [6.0337e-06, 9.8201e-01, 1.7986e-02],
#         [2.9539e-04, 8.8054e-01, 1.1917e-01]])

# For readability, approximate the above as follows
attn_scores_softmax = [  [0.0, 0.5, 0.5], [0.0, 1.0, 0.0], [0.0, 0.9, 0.1] ]
attn_scores_softmax = torch.tensor(attn_scores_softmax)

#6、将得分和值相乘
weighted_values = values[:,None] * attn_scores_softmax.t()[:,:,None]
print('weighted_values:','\n',weighted_values)
# tensor([[[0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 0.0000]],
#         [[1.0000, 4.0000, 0.0000], [2.0000, 8.0000, 0.0000], [1.8000, 7.2000, 0.0000]],
#         [[1.0000, 3.0000, 1.5000], [0.0000, 0.0000, 0.0000], [0.2000, 0.6000, 0.3000]]])

#7、求和加权值
outputs = weighted_values.sum(dim=0)
# tensor([[2.0000, 7.0000, 1.5000], [2.0000, 8.0000, 0.0000], [2.0000, 7.8000, 0.3000]]) # Output1、2、3
print('outputs:','\n',outputs)




 

 

你可能感兴趣的:(DL)