1.该模型是一个Seq2Seq模型;
2.其input为一个整数序列,label则为该整数序列后面追加整数1;
3.例如input=[3,2,1,4],lable=[3,2,1,4,1]
import numpy as np
import tensorflow as tf
import functools
参数
PAD = 0
EOS = 1
# embedding parameters
vocab_size = 10
input_embedding_size = 20
# network parameters
encoder_hidden_units = 20
decoder_hidden_units = 20
# training parameters
batch_size = 100
max_batches = 3001 # 训练需要执行的batch的个数
batches_in_epoch = 1000
辅助函数
def gen_batch(inputs,max_seq_length=None):
'''
将inputs转换为numpy数组并将所有sequence用0填充到等长。
参数
-------
inputs : (batch_size,seq_len)
'''
sequence_lengths = [len(seq) for seq in inputs]
batch_size = len(inputs)
if max_seq_length is None:
max_seq_length = max(sequence_lengths)
# inputs对应的numpy数组,其中batch_size作为axis=0
inputs_batch_major = np.zeros(shape=[batch_size,max_seq_length],dtype=np.int32)
# seq:(seq_len,input_num)
for i,seq in enumerate(inputs):
# element:input_num
for j,element in enumerate(seq):
inputs_batch_major[i,j] = element
# 将seq_len作为axis=0
inputs_seq_major = inputs_batch_major.swapaxes(0, 1)
return inputs_seq_major,max_seq_length
def random_sequences(length_from, length_to, vocab_lower, vocab_upper, batch_size):
'''
随机产生batch_size个sequences,
其中sequences的长度介于[length_from,length_to],
sequences中的值介于[vocab_lower,vocab_upper]
'''
if length_from > length_to:
raise ValueError('length_from > length_to')
def random_length():
'''
随机产生介于[length_from,length_to]的整数
'''
if length_from == length_to:
return length_from
return np.random.randint(length_from, length_to + 1)
while True:
yield [np.random.randint(low=vocab_lower,
high=vocab_upper,
size=random_length()).tolist()
for _ in range(batch_size)
]
def next_feed(batches):
'''
用于train时,为sess产生feed_dict
'''
batch = next(batches) # 产生当前batch的数据
encoder_inputs_, _ = gen_batch(batch) # 将该batch的数据处理为encoder期望的形式
# decoder_inputs_是在原始sequence前拼接EOS
decoder_inputs_, _ = gen_batch(
[[EOS] + (sequence) for sequence in batch]
)
# decoder_targets_是在原始sequence后拼接EOS
decoder_targets_, _ = gen_batch(
[(sequence) + [EOS] for sequence in batch]
)
return {
encoder_inputs: encoder_inputs_,
decoder_inputs: decoder_inputs_,
decoder_targets: decoder_targets_,
}
定义计算图
# encoder_inputs:(batch_size,seq_len)
encoder_inputs = tf.placeholder(shape=[None,None], dtype=tf.int32, name='encoder_inputs')
# decoder_inputs:(batch_size,seq_len+1)
decoder_inputs = tf.placeholder(shape=[None,None], dtype=tf.int32, name='decoder_inputs')
# decoder_targets:(batch_size,seq_len+1)
decoder_targets = tf.placeholder(shape=[None,None], dtype=tf.int32, name='decoder_targets')
# embedding
lookup_table = tf.Variable(tf.random_normal([vocab_size,input_embedding_size],-1.,1.),dtype=tf.float32)
# eocoder_inputs_embedded:(batch_size,seq_len,input_embedding_size)
encoder_inputs_embedded = tf.nn.embedding_lookup(lookup_table,encoder_inputs)
decoder_inputs_embedded = tf.nn.embedding_lookup(lookup_table,decoder_inputs)
# 定义encoder
encoder_cell = tf.contrib.rnn.LSTMCell(encoder_hidden_units)
encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(encoder_cell,
encoder_inputs_embedded,
dtype=tf.float32,
time_major=True) # time_major表示输入的axis=0轴为time(或者是seq_len)
del encoder_outputs # 将encoder的outputs丢弃,只将final_state传递给decoder
# 定义decoder
decoder_cell = tf.contrib.rnn.LSTMCell(decoder_hidden_units)
decoder_outputs,decoder_final_state = tf.nn.dynamic_rnn(decoder_cell,
decoder_inputs_embedded,
initial_state=encoder_final_state,
dtype=tf.float32,
time_major=True,
scope="plain_decoder")
# decoder_outputs是一个Tensor
decoder_logits = tf.contrib.layers.linear(decoder_outputs, vocab_size)
decoder_prediction = tf.argmax(decoder_logits, 2)
stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
labels=tf.one_hot(decoder_targets, depth=vocab_size, dtype=tf.float32),
logits=decoder_logits)
loss = tf.reduce_mean(stepwise_cross_entropy)
train_op = tf.train.AdamOptimizer().minimize(loss)
生成数据
batches = random_sequences(length_from=3, length_to=8,
vocab_lower=2, vocab_upper=10,
batch_size=batch_size)
训练
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
loss_track = []
for batch in range(max_batches):
fd = functools.partial(next_feed,batches)()
_, l = sess.run([train_op,loss],fd)
loss_track.append(l)
if batch == 0 or batch % batches_in_epoch == 0:
print('batch {}'.format(batch))
print(' minibatch loss: {}'.format(sess.run(loss, fd)))
predict_ = sess.run(decoder_prediction, fd)
for i, (inp, pred) in enumerate(zip(fd[encoder_inputs].T, predict_.T)):
print(' sample {}:'.format(i + 1))
print(' input > {}'.format(inp))
print(' predicted > {}'.format(pred))
if i >= 2:
break
batch 0
minibatch loss: 2.1847686767578125
sample 1:
input > [9 2 3 0 0 0 0 0]
predicted > [8 0 0 0 0 0 0 0 0]
sample 2:
input > [2 6 3 6 6 0 0 0]
predicted > [9 5 9 0 0 0 0 0 0]
sample 3:
input > [5 3 8 4 6 9 0 0]
predicted > [8 4 8 4 0 0 0 0 0]
batch 1000
minibatch loss: 0.32022926211357117
sample 1:
input > [5 8 3 7 0 0 0 0]
predicted > [5 8 3 7 1 0 0 0 0]
sample 2:
input > [3 9 7 0 0 0 0 0]
predicted > [9 9 7 1 0 0 0 0 0]
sample 3:
input > [5 7 2 0 0 0 0 0]
predicted > [5 7 2 1 0 0 0 0 0]
batch 2000
minibatch loss: 0.1575092226266861
sample 1:
input > [2 7 3 8 8 7 0 0]
predicted > [2 7 8 8 8 7 1 0 0]
sample 2:
input > [7 5 9 8 8 0 0 0]
predicted > [7 5 8 8 8 1 0 0 0]
sample 3:
input > [4 8 6 2 5 2 5 7]
predicted > [4 8 6 2 5 2 5 7 1]
batch 3000
minibatch loss: 0.1251118779182434
sample 1:
input > [5 7 5 5 0 0 0 0]
predicted > [5 7 5 5 1 0 0 0 0]
sample 2:
input > [7 2 9 6 5 6 9 0]
predicted > [7 2 9 6 5 6 9 1 0]
sample 3:
input > [3 2 3 9 9 4 9 0]
predicted > [3 2 3 9 9 4 2 1 0]