initrange = 0.5 / self.embed_size
self.out_embed = nn.Embedding(self.vocab_size, self.embed_size, sparse=False)
self.out_embed.weight.data.uniform_(-initrange, initrange) //权重初始化的一种方法
def input_embeddings(self): // 取出self.in_embed数据参数
return self.in_embed.weight.data.cpu().numpy()
USE_CUDA = torch.cuda.is_available()
if USE_CUDA:
model = model.cuda()
sklearn.metrics.pairwise.cosine_similarity(word1_embed, word2_embed))
torch.save(model.state_dict(), "embedding-{}.th".format(EMBEDDING_SIZE))
model.load_state_dict(torch.load("embedding-{}.th".format(EMBEDDING_SIZE)))
def __init__()中self.init_weights()
def init_weights(self):
initrange = 0.1
self.encoder.weight.data.uniform_(-initrange, initrange)
self.decoder.bias.data.zero_()
self.decoder.weight.data.uniform_(-initrange, initrange)
def init_hidden(self, bsz, requires_grad=True):
weight = next(self.parameters())
if self.rnn_type == 'LSTM':
return (weight.new_zeros((self.nlayers, bsz, self.nhid), requires_grad=requires_grad),
weight.new_zeros((self.nlayers, bsz, self.nhid), requires_grad=requires_grad))
else:
return weight.new_zeros((self.nlayers, bsz, self.nhid), requires_grad=requires_grad)
def repackage_hidden(h):
if isinstance(h, torch.Tensor):
return h.detach()
else:
return tuple(repackage_hidden(v) for v in h)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5)
在每次训练最后若模型效果较好,保存模型,否则scheduler.step()
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
pretrained_embedding = TEXT.vocab.vectors // glove
model.embed.weight.data.copy_(pretrained_embedding)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
model.embed.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_SIZE)
model.embed.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_SIZE)
torch.round(sigmoid(preds)
torch.nn.utils.clip_grad_norm_(model.parameters(), 5.)#防止模型gradient太大
with torch.no_grad()
model.eval()
model.train()