torch.nn.Transformer(d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation='relu', custom_encoder=None, custom_decoder=None)
- d_model – 编码器/解码器输入中预期词向量的大小(默认值= 512).
- nhead – 多头注意力模型中的头数(默认为8).
- num_encoder_layers – 编码器中子编码器层(transformer layers)的数量(默认为6).
- num_decoder_layers – 解码器中子解码器层的数量(默认为6).
- dim_feedforward – 前馈网络模型的尺寸(默认值= 2048).
- dropout – dropout的比例 (默认值=0.1).
- activation – 编码器/解码器中间层,激活函数relu或gelu(默认=relu).
- custom_encoder – 自定义编码器(默认=None).
- custom_decoder – 自定义解码器(默认=None).
from torch import nn
transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)
src = torch.rand((10, 32, 512))
tgt = torch.rand((20, 32, 512))
out = transformer_model(src, tgt)
torch.nn.TransformerEncoder(encoder_layer, num_layers, norm=None)
- coder_layer – TransformerEncoderLayer()的实例(必需).
- num_layers –编码器中的子编码器(transformer layers)层数(必需).
- norm–图层归一化组件(可选).
from torch import nn
encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
src = torch.rand(10, 32, 512)
out = transformer_encoder(src)
- coder_layer – TransformerEncoderLayer()的实例(必需).
- num_layers –编码器中的子编码器(transformer layers)层数(必需).
- norm–图层归一化组件(可选).
from torch import nn
decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
memory = torch.rand(10, 32, 512)
tgt = torch.rand(20, 32, 512)
out = transformer_decoder(tgt, memory)
torch.nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation='relu')
- d_model – 编码器/解码器输入中预期词向量的大小.
- nhead – 多头注意力模型中的头数.
- dim_feedforward – 前馈网络模型的尺寸(默认值= 2048).
- dropout – dropout的比例 (默认值=0.1).
- activation – 编码器/解码器中间层,激活函数relu或gelu(默认=relu).
encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
src = torch.rand(10, 32, 512)
out = encoder_layer(src)
torch.nn.TransformerDecoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation='relu')
- d_model – 编码器/解码器输入中预期词向量的大小
- nhead – 多头注意力模型中的头数.
- dim_feedforward – 前馈网络模型的尺寸(默认值= 2048).
- dropout – dropout的比例 (默认值=0.1).
- activation – 编码器/解码器中间层,激活函数relu或gelu(默认=relu).
decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
memory = torch.rand(10, 32, 512)
tgt = torch.rand(20, 32, 512)
out = decoder_layer(tgt, memory)
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
import math
class PositionalEncoding(nn.Module):
"Implement the PE function."
def __init__(self, d_model = 300, dropout = 0.2, max_len=5000):
super(PositionalEncoding, self).__init__()
self.dropout = nn.Dropout(p=dropout)
# Compute the positional encodings once in log space.
pe = torch.zeros(max_len, d_model)
position = torch.arange(0., max_len).unsqueeze(1)
div_term = torch.exp(torch.arange(0., d_model, 2) *
-(math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)
self.register_buffer('pe', pe)
def forward(self, x):
x = x + Variable(self.pe[:, :x.size(1)],
requires_grad=False)
return self.dropout(x)
class Transformer(nn.Module):
def __init__(self, vocab_size, label_size, mode='gru', bidirectional=True, cuda=True, is_training=True,intent_size = 26):
super(Transformer, self).__init__()
self.is_training = is_training
embedding_dim = 300
hidden_size = 300
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.rnn = nn.GRU(input_size=embedding_dim,
hidden_size=hidden_size,
bidirectional= False,
batch_first=True)
self.fc_slot = nn.Linear(300,label_size)
self.fc_intent = nn.Linear(300,26)
self.position = PositionalEncoding()
encoder_layer = nn.TransformerEncoderLayer(d_model=300, nhead=4)
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
def forward(self, X):
embed = self.embedding(X)
embed = self.position(embed)
embed = self.transformer_encoder(embed) #100
_, intent_outs = self.rnn(embed)
链接:官方文档