【TensorFlow】实现简单的Seq2Seq模型

1.该模型是一个Seq2Seq模型;

2.其input为一个整数序列,label则为该整数序列后面追加整数1;

3.例如input=[3,2,1,4],lable=[3,2,1,4,1]

import numpy as np
import tensorflow as tf
import functools

参数

PAD = 0
EOS = 1

# embedding parameters
vocab_size = 10
input_embedding_size = 20

# network parameters
encoder_hidden_units = 20
decoder_hidden_units = 20

# training parameters
batch_size = 100
max_batches = 3001 # 训练需要执行的batch的个数
batches_in_epoch = 1000

辅助函数

def gen_batch(inputs,max_seq_length=None):
    '''
    将inputs转换为numpy数组并将所有sequence用0填充到等长。
    
    参数
    -------
    inputs : (batch_size,seq_len)
    '''
    sequence_lengths = [len(seq) for seq in inputs]
    batch_size = len(inputs)
    if max_seq_length is None:
        max_seq_length = max(sequence_lengths)
    # inputs对应的numpy数组,其中batch_size作为axis=0
    inputs_batch_major = np.zeros(shape=[batch_size,max_seq_length],dtype=np.int32)
    # seq:(seq_len,input_num)
    for i,seq in enumerate(inputs):
        # element:input_num
        for j,element in enumerate(seq):
            inputs_batch_major[i,j] = element
    # 将seq_len作为axis=0
    inputs_seq_major = inputs_batch_major.swapaxes(0, 1)
    return inputs_seq_major,max_seq_length
def random_sequences(length_from, length_to, vocab_lower, vocab_upper, batch_size):
    '''
    随机产生batch_size个sequences,
    其中sequences的长度介于[length_from,length_to],
    sequences中的值介于[vocab_lower,vocab_upper]
    '''
    if length_from > length_to:
        raise ValueError('length_from > length_to')
    def random_length():
        '''
        随机产生介于[length_from,length_to]的整数
        '''
        if length_from == length_to:
            return length_from
        return np.random.randint(length_from, length_to + 1)

    while True:
        yield [np.random.randint(low=vocab_lower,
                              high=vocab_upper,
                              size=random_length()).tolist()
            for _ in range(batch_size)
        ]
def next_feed(batches):
    '''
    用于train时,为sess产生feed_dict
    '''
    batch = next(batches) # 产生当前batch的数据
    encoder_inputs_, _ = gen_batch(batch) # 将该batch的数据处理为encoder期望的形式
    # decoder_inputs_是在原始sequence前拼接EOS
    decoder_inputs_, _ = gen_batch(
        [[EOS] + (sequence) for sequence in batch]
    )
    # decoder_targets_是在原始sequence后拼接EOS
    decoder_targets_, _ = gen_batch(
        [(sequence) + [EOS] for sequence in batch]
    )
    return {
        encoder_inputs: encoder_inputs_,
        decoder_inputs: decoder_inputs_,
        decoder_targets: decoder_targets_,
    }

定义计算图

# encoder_inputs:(batch_size,seq_len)
encoder_inputs = tf.placeholder(shape=[None,None], dtype=tf.int32, name='encoder_inputs')
# decoder_inputs:(batch_size,seq_len+1)
decoder_inputs = tf.placeholder(shape=[None,None], dtype=tf.int32, name='decoder_inputs')
# decoder_targets:(batch_size,seq_len+1)
decoder_targets = tf.placeholder(shape=[None,None], dtype=tf.int32, name='decoder_targets')

# embedding
lookup_table = tf.Variable(tf.random_normal([vocab_size,input_embedding_size],-1.,1.),dtype=tf.float32)
# eocoder_inputs_embedded:(batch_size,seq_len,input_embedding_size)
encoder_inputs_embedded = tf.nn.embedding_lookup(lookup_table,encoder_inputs)
decoder_inputs_embedded = tf.nn.embedding_lookup(lookup_table,decoder_inputs)

# 定义encoder
encoder_cell = tf.contrib.rnn.LSTMCell(encoder_hidden_units)
encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(encoder_cell,
                                                         encoder_inputs_embedded,
                                                         dtype=tf.float32,
                                                         time_major=True) # time_major表示输入的axis=0轴为time(或者是seq_len)
del encoder_outputs # 将encoder的outputs丢弃,只将final_state传递给decoder

# 定义decoder
decoder_cell = tf.contrib.rnn.LSTMCell(decoder_hidden_units)
decoder_outputs,decoder_final_state = tf.nn.dynamic_rnn(decoder_cell,
                                                       decoder_inputs_embedded,
                                                       initial_state=encoder_final_state,
                                                       dtype=tf.float32,
                                                       time_major=True,
                                                       scope="plain_decoder")

# decoder_outputs是一个Tensor
decoder_logits = tf.contrib.layers.linear(decoder_outputs, vocab_size)
decoder_prediction = tf.argmax(decoder_logits, 2)
stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
    labels=tf.one_hot(decoder_targets, depth=vocab_size, dtype=tf.float32),
    logits=decoder_logits)

loss = tf.reduce_mean(stepwise_cross_entropy)
train_op = tf.train.AdamOptimizer().minimize(loss)

生成数据

batches = random_sequences(length_from=3, length_to=8,
                           vocab_lower=2, vocab_upper=10,
                           batch_size=batch_size)

训练

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    loss_track = []
    for batch in range(max_batches):
        fd = functools.partial(next_feed,batches)()
        _, l = sess.run([train_op,loss],fd)
        loss_track.append(l)
        
        if batch == 0 or batch % batches_in_epoch == 0:
            print('batch {}'.format(batch))
            print('  minibatch loss: {}'.format(sess.run(loss, fd)))
            predict_ = sess.run(decoder_prediction, fd)
            for i, (inp, pred) in enumerate(zip(fd[encoder_inputs].T, predict_.T)):
                print('  sample {}:'.format(i + 1))
                print('    input     > {}'.format(inp))
                print('    predicted > {}'.format(pred))
                if i >= 2:
                    break
batch 0
  minibatch loss: 2.1847686767578125
  sample 1:
    input     > [9 2 3 0 0 0 0 0]
    predicted > [8 0 0 0 0 0 0 0 0]
  sample 2:
    input     > [2 6 3 6 6 0 0 0]
    predicted > [9 5 9 0 0 0 0 0 0]
  sample 3:
    input     > [5 3 8 4 6 9 0 0]
    predicted > [8 4 8 4 0 0 0 0 0]
batch 1000
  minibatch loss: 0.32022926211357117
  sample 1:
    input     > [5 8 3 7 0 0 0 0]
    predicted > [5 8 3 7 1 0 0 0 0]
  sample 2:
    input     > [3 9 7 0 0 0 0 0]
    predicted > [9 9 7 1 0 0 0 0 0]
  sample 3:
    input     > [5 7 2 0 0 0 0 0]
    predicted > [5 7 2 1 0 0 0 0 0]
batch 2000
  minibatch loss: 0.1575092226266861
  sample 1:
    input     > [2 7 3 8 8 7 0 0]
    predicted > [2 7 8 8 8 7 1 0 0]
  sample 2:
    input     > [7 5 9 8 8 0 0 0]
    predicted > [7 5 8 8 8 1 0 0 0]
  sample 3:
    input     > [4 8 6 2 5 2 5 7]
    predicted > [4 8 6 2 5 2 5 7 1]
batch 3000
  minibatch loss: 0.1251118779182434
  sample 1:
    input     > [5 7 5 5 0 0 0 0]
    predicted > [5 7 5 5 1 0 0 0 0]
  sample 2:
    input     > [7 2 9 6 5 6 9 0]
    predicted > [7 2 9 6 5 6 9 1 0]
  sample 3:
    input     > [3 2 3 9 9 4 9 0]
    predicted > [3 2 3 9 9 4 2 1 0]

你可能感兴趣的:(【TensorFlow】实现简单的Seq2Seq模型)