#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date : 2019-02-26 14:15:49
# @Author : cdl ([email protected])
# @Link : https://github.com/cdlwhm1217096231/python3_spider
# @Version : $Id$
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
"""
1.Basic Embedding Model
1-1. NNLM(Neural Network Language Model)
"""
dtype = torch.FloatTensor
sentences = ["i like dog", "i love coffee", "i hate milk"]
word_list = " ".join(sentences).split() # 制作词汇表
print(word_list)
word_list = list(set(word_list)) # 去除词汇表中的重复元素
print("去重后的word_list:", word_list)
word_dict = {w: i for i, w in enumerate(word_list)} # 将每个单词对应于相应的索引
number_dict = {i: w for i, w in enumerate(word_list)} # 将每个索引对应于相应的单词
n_class = len(word_dict) # 单词的总数
# NNLM parameters
n_step = 2 # 根据前两个单词预测第3个单词
n_hidden = 2 # 隐藏层神经元的个数
m = 2 # 词向量的维度
# 由于pytorch中输入的数据是以batch小批量进行输入的,下面的函数就是将原始数据以一个batch为基本单位喂给模型
def make_batch(sentences):
input_batch = []
target_batch = []
for sentence in sentences:
word = sentence.split()
input = [word_dict[w] for w in word[:-1]]
target = word_dict[word[-1]]
input_batch.append(input)
target_batch.append(target)
return input_batch, target_batch
# Model
class NNLM(nn.Module):
def __init__(self):
super(NNLM, self).__init__()
self.C = nn.Embedding(n_class, embedding_dim=m)
self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
self.b = nn.Parameter(torch.randn(n_class).type(dtype))
def forward(self, x):
x = self.C(x)
x = x.view(-1, n_step * m)
# x: [batch_size, n_step*n_class]
tanh = torch.tanh(self.d + torch.mm(x, self.H))
# tanh: [batch_size, n_hidden]
output = self.b + torch.mm(x, self.W) + torch.mm(tanh, self.U)
# output: [batch_size, n_class]
return output
model = NNLM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 制作输入
input_batch, target_batch = make_batch(sentences)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))
# 开始训练
for epoch in range(5000):
optimizer.zero_grad()
output = model(input_batch)
# output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
loss = criterion(output, target_batch)
if (epoch + 1) % 1000 == 0:
print("Epoch:{}".format(epoch + 1), "Loss:{:.3f}".format(loss))
loss.backward()
optimizer.step()
# 预测
predict = model(input_batch).data.max(
1, keepdim=True)[1] # [batch_size, n_class]
print("predict: \n", predict)
# 测试
print([sentence.split()[:2] for sentence in sentences], "---->",
[number_dict[n.item()] for n in predict.squeeze()])
['i', 'like', 'dog', 'i', 'love', 'coffee', 'i', 'hate', 'milk']
去重后的word_list: ['coffee', 'i', 'hate', 'dog', 'love', 'milk', 'like']
Epoch:1000 Loss:0.114
Epoch:2000 Loss:0.021
Epoch:3000 Loss:0.007
Epoch:4000 Loss:0.003
Epoch:5000 Loss:0.002
predict:
tensor([[3],
[0],
[5]])
[['i', 'like'], ['i', 'love'], ['i', 'hate']] ----> ['dog', 'coffee', 'milk']
[Finished in 4.5s]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date : 2019-02-26 21:25:01
# @Author : cdl ([email protected])
# @Link : https://github.com/cdlwhm1217096231/python3_spider
# @Version : $Id$
import numpy as np
import tensorflow as tf
tf.reset_default_graph()
sentences = ["i like coffee", "i love curry", "i hate apple"]
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
print(word_list)
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict)
# Model parameters
n_step = 2
n_hidden = 5
def make_batch(sentences):
input_batch = []
target_batch = []
for sentence in sentences:
words = sentence.split()
input = [word_dict[word] for word in words[:-1]]
target = word_dict[words[-1]]
input_batch.append(np.eye(n_class)[input]) # np.eye()是单位对角阵
target_batch.append(np.eye(n_class)[target])
return input_batch, target_batch
# Model
# [batch_size, number of steps, number of Vocabulary]
X = tf.placeholder(tf.float32, [None, n_step, n_class])
Y = tf.placeholder(tf.float32, [None, n_class])
# [batch_size, n_step * n_class]
input = tf.reshape(X, shape=[-1, n_step * n_class])
H = tf.Variable(tf.random_normal([n_step * n_class, n_hidden]))
d = tf.Variable(tf.random_normal([n_hidden]))
U = tf.Variable(tf.random_normal([n_hidden, n_class]))
b = tf.Variable(tf.random_normal([n_class]))
tanh = tf.nn.tanh(d + tf.matmul(input, H)) # [batch_size, n_hidden]
output = tf.matmul(tanh, U) + b # [batch_size, n_class]
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=Y))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
prediction = tf.argmax(output, 1)
# Training
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
input_batch, target_batch = make_batch(sentences)
for epoch in range(5000):
_, loss = sess.run([optimizer, cost], feed_dict={
X: input_batch, Y: target_batch})
if (epoch + 1) % 1000 == 0:
print("Epoch:{}".format(epoch + 1), "Cost:{:.4f}".format(loss))
# Predict
predict = sess.run([prediction], feed_dict={X: input_batch})
# Test
input = [sentence.split()[:2] for sentence in sentences]
print([sentence.split()[:2] for sentence in sentences],
'---->', [number_dict[n] for n in predict[0]])
['like', 'love', 'apple', 'coffee', 'hate', 'curry', 'i']
Epoch:1000 Cost:0.1147
Epoch:2000 Cost:0.0324
Epoch:3000 Cost:0.0127
Epoch:4000 Cost:0.0057
Epoch:5000 Cost:0.0029
[['i', 'like'], ['i', 'love'], ['i', 'hate']] ----> ['coffee', 'curry', 'apple']
[Finished in 7.0s]
1.Yoshua Bengio, Rejean Ducharme, Pascal Vincent, and Christian Jauvin. A neural probabilistic language model. Journal of Machine Learning Research (JMLR), 3:1137–1155, 2003.
2.LICSTAR的博客
3.A Neural Probabilistic Language Model CSDN博客