文本处理的前提
文本预处理:
fech
由于训练集和测试集的格式不一样,
训练集为
测试集为
训练集用','
来分割
测试集用'\t'
来分割,代码如下:
train:
def generate_seg_file(input_file, out_seg_file):
with open(input_file, 'r', encoding='utf8') as f:
lines = f.readlines()
with open(out_seg_file, 'w') as f:
# 去掉第一行的数据
for line in lines[1:]:
index, label, content, count = line.strip('\n').split(',')
word_iter = jieba.cut(content)
word_content = ''
for word in word_iter:
word = word.strip(' ')
if word != '':
word_content += word + ' '
out_line = '%s\t%s\n' % (label, word_content.strip(' '))
f.write(out_line)
generate_seg_file(train_file, seg_train_file)
测试集:
def generate_seg_file(input_file, out_seg_file):
with open(input_file, 'r', encoding='utf8') as f:
lines = f.readlines()
with open(out_seg_file, 'w', encoding='utf8') as f:
for line in lines:
label, content = line.strip('\n').split('\t')
word_iter = jieba.cut(content)
word_content = ''
for word in word_iter:
word = word.strip(' ')
if word != '':
word_content += word + ' '
out_line = '%s\t%s\n' % (label, word_content.strip(' '))
f.write(out_line)
generate_seg_file(test_file, seg_test_file)
def generare_category_dcit(input_file, categegory_file):
with open(input_file, 'r', encoding='utf8') as f:
lines = f.readlines()[1:]
categegory_dict = {}
for line in lines:
index, label, content, count = line.strip('\r\n').split(',')
categegory_dict.setdefault(label, 0)
categegory_dict[label] += 1
category_number = len(categegory_dict)
with open(categegory_file, 'w') as f:
for category in categegory_dict:
line = '%s\n' % category
print('{0}\t{1}'.format(category, categegory_dict[category]))
f.write(line)
generare_category_dcit(train_file, category_file)
利用已有的分词后的训练集进行构建词表
是用来记录出现在训练集的词,而没有出现在测试集的词
def generate_vocab_file(input_seg_file, output_vocab_file):
with open(input_seg_file, 'r') as f:
lines = f.readlines()
word_dict = {}
for line in lines:
label, content = line.strip('\r\n').split('\t')
for word in content.split():
word_dict.setdefault(word, 0)
word_dict[word] += 1
sorted_word_dict = sorted(
word_dict.items(), key = lambda d:d[1], reverse=True)
with open(output_vocab_file, 'w') as f:
f.write('\t1000000\n' )
for item in sorted_word_dict:
f.write('%s\t%d\n' % (item[0], item[1]))
generate_vocab_file(seg_train_file, vocab_file)
在构神经网络之前,先逐步查看生成对应的文件格式对不对,一防止后面的出错
import tensorflow as tf
import os
import sys
import numpy as np
import math
tf.logging.set_verbosity(tf.logging.INFO)
def get_default_params():
return tf.contrib.training.HParams(
num_embedding_size = 16,
num_timesteps = 50,
num_lstm_nodes = [32, 32],
num_lstm_laysers = 2,
num_fc_nodes = 32,
batch_size = 100,
clip_lstm_grads = 1.0,
learning_rate = 0.001,
num_word_threshold = 10,
)
hps = get_default_params()
train_file = '.\deep_learn\sohu_train.txt'
test_file = '.\deep_learn\sohu_test.txt'
vocab_file = '.\deep_learn\sohu_vocab.txt'
category_file = '.\deep_learn\sohu_category.txt'
output_file = '.\deep_learn\sohu_run_text_run'
if not os.path.exists(output_file):
os.mkdir(output_file)
词表封装api
class Vocab:
def __init__(self, filename, num_word_threshold):
self._word_to_id = {}
self._unk = -1
self._num_word_threshold = num_word_threshold
self._read_dict(filename)
def _read_dict(self, filename):
with open(filename, 'r') as f:
lines = f.readlines()
for line in lines:
word, frequency = line.strip('\r\n').split('\t')
frequency = int(frequency)
if frequency < self._num_word_threshold:
continue
idx = len(self._word_to_id)
if word == '' :
self._unk = idx
self._word_to_id[word] = idx
def word_to_id(self, word):
return self._word_to_id.get(word, self._unk)
@property
def unk(self):
return self._unk
def size(self):
return len(self._word_to_id)
def sentence_to_id(self, sentence):
word_ids = [self.word_to_id(cur_word) for cur_word in sentence.split()]
return word_ids
# 测试
vocab = Vocab(vocab_file, hps.num_word_threshold)
tf.logging.info('vocab_size: {}'.format(vocab.size()))
类别的封装
class CategoryDict:
def __init__(self, filename):
self._category_to_id = {}
with open(filename, 'r') as f:
lines = f.readlines()
for line in lines:
category = line.strip('\r\n')
idx = len(self._category_to_id)
self._category_to_id[category] = idx
def category_to_id(self, category):
if category not in self._category_to_id:
raise Exception("{} is not in our category".format(category))
return self._category_to_id[category]
# 测试
category_vocab = CategoryDict(category_file)
test_str = '女人'
tf.logging.info('id:{}'.format(category_vocab.category_to_id(test_str)))
# api sentence_to_id的实现
class Vocab:
def __init__(self, filename, num_word_threshold):
self._word_to_id = {}
self._unk = -1
self._num_word_threshold = num_word_threshold
self._read_dict(filename)
def _read_dict(self, filename):
with open(filename, 'r') as f:
lines = f.readlines()
for line in lines:
word, frequency = line.strip('\r\n').split('\t')
frequency = int(frequency)
if frequency < self._num_word_threshold:
continue
idx = len(self._word_to_id)
if word == '' :
self._unk = idx
self._word_to_id[word] = idx
def word_to_id(self, word):
return self._word_to_id.get(word, self._unk)
@property
def unk(self):
return self._unk
def size(self):
return len(self._word_to_id)
def sentence_to_id(self, sentence):
word_ids = [self.word_to_id(cur_word) for cur_word in sentence.split()]
return word_ids
category_to_id的实现和test
class CategoryDict:
def __init__(self, filename):
self._category_to_id = {}
with open(filename, 'r') as f:
lines = f.readlines()
for line in lines:
category = line.strip('\r\n')
idx = len(self._category_to_id)
self._category_to_id[category] = idx
def category_to_id(self, category):
if not category in self._category_to_id:
print(self._category_to_id)
raise Exception("{} is not in our category".format(category))
return self._category_to_id[category]
vocab = Vocab(vocab_file, hps.num_word_threshold)
tf.logging.info('vocab_size: {}'.format(vocab.size()))
category_vocab = CategoryDict(category_file)
test_str = '女人'
tf.logging.info('id:{}'.format(category_vocab.category_to_id(test_str)))
数据集的netx batch的实现
由于train_seg_file和test_seg_file的格式不一样,所以编码不一样
class TextDataSet:
def __init__(self, filename, vocab, category_vocab, num_timesteps):
self._vocab = vocab
self._category_vocab = category_vocab
self._num_timesteps = num_timesteps
self._inputs = []
self._outputs = []
self._indicator = 0
self._parse_file(filename)
def _parse_file(self, filename):
tf.logging.info('Loading data from {}'.format(filename))
lines = 0
import re
if re.findall('train', filename):
with open(filename, 'r') as f:
lines = f.readlines()
elif re.findall('test', filename):
with open(filename, 'r', encoding='utf-8-sig') as f:
lines = f.readlines()
for line in lines:
label, content = line.strip('\r\n').split('\t')
id_label = self._category_vocab.category_to_id(label)
id_words = self._vocab.sentence_to_id(content)
id_words = id_words[0: self._num_timesteps]
padding_num = self._num_timesteps - len(id_words)
id_words = id_words + [
self._vocab.unk for i in range(padding_num)]
self._inputs.append(id_label)
self._outputs.append(id_words)
self._inputs = np.asarray(self._inputs, dtype=np.int32)
self._outputs = np.asarray(self._outputs, dtype=np.int32)
self._random_shuffle()
def _random_shuffle(self):
p = np.random.permutation(len(self._inputs))
self._inputs = self._inputs[p]
self._outputs = self._outputs[p]
def next_batch(self, batch_size):
end_indicator = self._indicator + batch_size
if end_indicator > len(self._inputs):
self._random_shuffle()
self._indicator = 0
end_indicator = batch_size
if end_indicator > len(self._inputs):
raise Exception("batch size: {} is too large".format(batch_size))
batch_inputs = self._inputs[self._indicator: end_indicator]
batch_output = self._outputs[self._indicator: end_indicator]
self._indicator = end_indicator
return batch_inputs, batch_output
train_dataset = TextDataSet(train_file, vocab, category_vocab, hps.num_timesteps)
test_dataset = TextDataSet(test_file, vocab, category_vocab, hps.num_timesteps)
print(train_dataset.next_batch(2))
print(test_dataset.next_batch(2))
def create_model(hps, vocab_size, num_classes):
# 取一个句子的前50个分词, num_classes为固定的50个分词
num_timesteps = hps.num_timesteps
# 训练批次大小
batch_size = hps.batch_size
# 输入为[批次的大小,50]
inputs = tf.placeholder(tf.int32, (batch_size, num_timesteps))
# 输出为[批次的大小,]
outputs = tf.placeholder(tf.int32, (batch_size, ))
# dropout的使用
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
# 保存训练到哪一步
global_step = tf.Variable(
tf.zeros([], tf.int64), name='global_step', trainable=False)
# 随机化embedding 编码
embedding_initializer = tf.random_uniform_initializer(-1.0, 1.0)
with tf.variable_scope(
'embedding', initializer=embedding_initializer):
embeddings = tf.get_variable(
'embedding',
[vocab_size, hps.num_embedding_size],
tf.float32)
# 把输入的分词中的id -> embedding编码形式
# ex [1, 10, 7] -> [embeddings[1], embeddings[10], embeddings[7]]
embed_inputs = tf.nn.embedding_lookup(embeddings, inputs)
# 网络initializer的一种方法
scale = 1.0 / math.sqrt(hps.num_embedding_size + hps.num_lstm_nodes[-1]) / 3.0
lstm_init = tf.random_uniform_initializer(-scale, scale)
# 构建lstm
with tf.variable_scope('lstm_nn', initializer=lstm_init):
cells = []
for i in range(hps.num_lstm_laysers):
# 循环初始化lstm
cell = tf.contrib.rnn.BasicLSTMCell(
hps.num_lstm_nodes[i],
state_is_tuple = True
)
# 使用dropout方法
cell = tf.contrib.rnn.DropoutWrapper(
cell,
output_keep_prob = keep_prob
)
cells.append(cell)
# 合并两个cell
cell = tf.contrib.rnn.MultiRNNCell(cells)
# 初始化cell内的值
initial_state = cell.zero_state(batch_size, tf.float32)
# run_outputs: [batch_size, num_timesteps, lstm_outpus[-1]]
run_outpus, _ = tf.nn.dynamic_rnn(
cell, embed_inputs, initial_state=initial_state
)
print(run_outpus)
last = run_outpus[:, -1, :]
fc_init = tf.uniform_unit_scaling_initializer(factor=1.0)
# lstm连接到全连接层
with tf.variable_scope('fc', initializer=fc_init):
fc1 = tf.layers.dense(last,
hps.num_fc_nodes,
activation=tf.nn.relu,
name='fc1')
# 使用dropout方法
fc1_dropout = tf.contrib.layers.dropout(fc1, keep_prob)
logits = tf.layers.dense(fc1_dropout,
num_classes,
name='fc2')
# 计算损失函数
with tf.name_scope('metrics'):
sofmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels= outputs
)
loss = tf.reduce_mean(sofmax_loss)
y_pred = tf.arg_max(tf.nn.softmax(logits=logits),
1,
output_type= tf.int32)
correct_pred = tf.equal(outputs, y_pred)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# 构建train_op
with tf.name_scope('train_op'):
tvars = tf.trainable_variables()
for var in tvars:
tf.logging.info('variable name: {}'.format(var.name))
# 限制训练时的梯度大小,使得不会出现梯度爆炸
grads, _ = tf.clip_by_global_norm(
tf.gradients(loss, tvars), hps.clip_lstm_grads
)
# 梯度应用到变量中去
optimizer = tf.train.AdamOptimizer(hps.learning_rate)
train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step= global_step
)
return ((inputs, outputs, keep_prob),
(loss, accuracy),
(train_op, global_step))
placeholders, metrics, others = create_model(
hps, vocab_size, num_classes
)
inputs, outputs, keep_prod = placeholders
loss, accuracy = metrics
train_op, global_step = others
init_op = tf.global_variables_initializer()
train_keep_prob_value = 0.8
test_keep_prob_value = 1.0
test_steps = 100
num_train_steps = 10000
with tf.Session() as sess:
sess.run(init_op)
for i in range(num_train_steps):
batch_inputs, batch_labels = train_dataset.next_batch(
hps.batch_size
)
outputs_val = sess.run([loss, accuracy, train_op, global_step],
feed_dict = {
inputs: batch_inputs,
outputs: batch_labels,
keep_prod: train_keep_prob_value,
})
loss_val, accuracy_val, _, global_step_val = outputs_val
if (i+1) % 20 == 0:
tf.logging.info("Train Step: {}, loss: {}, accuracy: {}".format(global_step_val, loss_val, accuracy_val))
if (i+1) % 100 == 0:
all_test_acc_cal = []
for j in range(test_steps):
test_inputs, test_labels = test_dataset.next_batch(hps.batch_size)
test_val = sess.run([loss, accuracy, train_op, global_step],
feed_dict= {
inputs: test_inputs,
outputs: test_labels,
keep_prod: test_keep_prob_value,
})
test_loss_val, test_accuarcy_val, _, test_step_val = test_val
all_test_acc_cal.append(test_accuarcy_val)
test_acc = np.mean(all_test_acc_cal)
tf.logging.info("Test Step: {}, loss: {}, accuracy: {}".format(global_step_val, test_loss_val, test_acc))
使用以下参数
# num_embedding_size = 16,
num_embedding_size = 32,
# 一个句子取前50个分词
# num_timesteps = 50,
num_timesteps = 600,
# num_lstm_nodes = [32, 32],
num_lstm_nodes = [64, 64],
num_lstm_laysers = 2,
# num_fc_nodes = 32,
num_fc_nodes = 64,
batch_size = 100,
clip_lstm_grads = 1.0,
learning_rate = 0.001,
num_word_threshold = 10,
训练10K次得到:
train 集和test集大概都有98%的准确度
def create_model(hps, vocab_size, num_classes):
# 取一个句子的前50个分词, num_classes为固定的50个分词
num_timesteps = hps.num_timesteps
# 训练批次大小
batch_size = hps.batch_size
# 输入为[批次的大小,50]
inputs = tf.placeholder(tf.int32, (batch_size, num_timesteps))
# 输出为[批次的大小,]
outputs = tf.placeholder(tf.int32, (batch_size, ))
# dropout的使用
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
# 保存训练到哪一步
global_step = tf.Variable(
tf.zeros([], tf.int64), name='global_step', trainable=False)
# 随机化embedding 编码
embedding_initializer = tf.random_uniform_initializer(-1.0, 1.0)
with tf.variable_scope(
'embedding', initializer=embedding_initializer):
embeddings = tf.get_variable(
'embedding',
[vocab_size, hps.num_embedding_size],
tf.float32)
# 把输入的分词中的id -> embedding编码形式
# ex [1, 10, 7] -> [embeddings[1], embeddings[10], embeddings[7]]
embed_inputs = tf.nn.embedding_lookup(embeddings, inputs)
# 卷积实现
scale = 1.0 / math.sqrt(hps.num_embedding_size + hps.num_filters) / 3.0
cnn_init = tf.random_uniform_initializer(-scale, scale)
with tf.variable_scope('cnn', initializer= cnn_init):
# embed_inputs : [batch_size, timesteps, embed_size]
# conv1d : [batch_size, timesteps, num_filters]
conv1d = tf.layers.conv1d(embed_inputs,
hps.num_filters,
hps.num_kernel_size,
activation=tf.nn.relu,
)
global_maxpooling = tf.reduce_max(conv1d, axis=[1])
"""
# 网络initializer的一种方法
scale = 1.0 / math.sqrt(hps.num_embedding_size + hps.num_lstm_nodes[-1]) / 3.0
lstm_init = tf.random_uniform_initializer(-scale, scale)
# 构建lstm
with tf.variable_scope('lstm_nn', initializer=lstm_init):
cells = []
for i in range(hps.num_lstm_laysers):
# 循环初始化lstm
cell = tf.contrib.rnn.BasicLSTMCell(
hps.num_lstm_nodes[i],
state_is_tuple = True
)
# 使用dropout方法
cell = tf.contrib.rnn.DropoutWrapper(
cell,
output_keep_prob = keep_prob
)
cells.append(cell)
# 合并两个cell
cell = tf.contrib.rnn.MultiRNNCell(cells)
# 初始化cell内的值
initial_state = cell.zero_state(batch_size, tf.float32)
# run_outputs: [batch_size, num_timesteps, lstm_outpus[-1]]
run_outpus, _ = tf.nn.dynamic_rnn(
cell, embed_inputs, initial_state=initial_state
)
print(run_outpus)
last = run_outpus[:, -1, :]
"""
fc_init = tf.uniform_unit_scaling_initializer(factor=1.0)
# lstm连接到全连接层
with tf.variable_scope('fc', initializer=fc_init):
fc1 = tf.layers.dense(global_maxpooling,
hps.num_fc_nodes,
activation=tf.nn.relu,
name='fc1')
# 使用dropout方法
fc1_dropout = tf.contrib.layers.dropout(fc1, keep_prob)
logits = tf.layers.dense(fc1_dropout,
num_classes,
name='fc2')
# 计算损失函数
with tf.name_scope('metrics'):
sofmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels= outputs
)
loss = tf.reduce_mean(sofmax_loss)
y_pred = tf.arg_max(tf.nn.softmax(logits=logits),
1,
output_type= tf.int32)
correct_pred = tf.equal(outputs, y_pred)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# 构建train_op
with tf.name_scope('train_op'):
train_op = tf.train.AdamOptimizer(hps.learning_rate).minimize(loss,
global_step=global_step)
"""
tvars = tf.trainable_variables()
for var in tvars:
tf.logging.info('variable name: {}'.format(var.name))
# 限制训练时的梯度大小,使得不会出现梯度爆炸
grads, _ = tf.clip_by_global_norm(
tf.gradients(loss, tvars), hps.clip_lstm_grads
)
# 梯度应用到变量中去
optimizer = tf.train.AdamOptimizer(hps.learning_rate)
train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step= global_step
)
"""
return ((inputs, outputs, keep_prob),
(loss, accuracy),
(train_op, global_step))
placeholders, metrics, others = create_model(
hps, vocab_size, num_classes
)
inputs, outputs, keep_prod = placeholders
loss, accuracy = metrics
train_op, global_step = others
#%%
# train:
init_op = tf.global_variables_initializer()
train_keep_prob_value = 0.8
test_keep_prob_value = 1.0
test_steps = 100
num_train_steps = 10000
# Train : 100%
# Test : 95.3%
with tf.Session() as sess:
sess.run(init_op)
for i in range(num_train_steps):
batch_inputs, batch_labels = train_dataset.next_batch(
hps.batch_size
)
outputs_val = sess.run([loss, accuracy, train_op, global_step],
feed_dict = {
inputs: batch_inputs,
outputs: batch_labels,
keep_prod: train_keep_prob_value,
})
loss_val, accuracy_val, _, global_step_val = outputs_val
if global_step_val % 100 == 0:
tf.logging.info("Train Step: {}, loss: {}, accuracy: {}".format(global_step_val, loss_val, accuracy_val))
if global_step_val % 1000 == 0:
all_test_acc_cal = []
for j in range(test_steps):
test_inputs, test_labels = test_dataset.next_batch(hps.batch_size)
test_val = sess.run([loss, accuracy, train_op, global_step],
feed_dict= {
inputs: test_inputs,
outputs: test_labels,
keep_prod: test_keep_prob_value,
})
test_loss_val, test_accuarcy_val, _, test_step_val = test_val
all_test_acc_cal.append(test_accuarcy_val)
test_acc = np.mean(all_test_acc_cal)
tf.logging.info("------Test Step: {}, loss: {}, accuracy: {}".format(global_step_val, test_loss_val, test_acc))
使用CNN会比lstm收敛地快很多
参数如下:
def get_default_params():
return tf.contrib.training.HParams(
# num_embedding_size = 16,
num_embedding_size = 32,
# 一个句子取前50个分词
# num_timesteps = 50,
num_timesteps = 600,
# num_filters = 128,
num_filters = 256,
num_kernel_size = 3,
num_fc_nodes = 32,
# num_fc_nodes = 64,
batch_size = 100,
learning_rate = 0.001,
num_word_threshold = 10,
)
在后面使用(Bi-)和attention会比单单LSTM收敛的更快。参考tf-rnn-attention。
代码为
def create_model(hps, vocab_size, num_classes):
# 取一个句子的前50个分词, num_classes为固定的50个分词
num_timesteps = hps.num_timesteps
# 训练批次大小
batch_size = hps.batch_size
# 输入为[批次的大小,50]
inputs = tf.placeholder(tf.int32, (batch_size, num_timesteps))
# 输出为[批次的大小,]
outputs = tf.placeholder(tf.int32, (batch_size, ))
# dropout的使用
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
# 保存训练到哪一步
global_step = tf.Variable(
tf.zeros([], tf.int64), name='global_step', trainable=False)
seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph')
# 随机化embedding 编码
embedding_initializer = tf.random_uniform_initializer(-1.0, 1.0)
with tf.variable_scope(
'embedding', initializer=embedding_initializer):
embeddings = tf.get_variable(
'embedding',
[vocab_size+2, hps.num_embedding_size],
tf.float32)
print('embeddings', embeddings)# (85430, 32)
# 把输入的分词中的id -> embedding编码形式
# ex [1, 10, 7] -> [embeddings[1], embeddings[10], embeddings[7]]
embed_inputs = tf.nn.embedding_lookup(embeddings, inputs) # (100, 200, 32)
print('embed_inputs', embed_inputs)
# (Bi-)RNN layser
rnn_outputs, _ = bi_rnn(GRUCell(64), GRUCell(64),
inputs=embed_inputs, sequence_length=seq_len_ph, dtype=tf.float32)
# [batch_size, cell_fw.output_size + cell_bw.output_size]
print('rnn_outputs', rnn_outputs) # shape=(100, 200, 64)
# (100, 200, 64)
# last = rnn_outputs[:, -1, :]
# Attention layer
with tf.name_scope('Attention_layer'):
attention_output, alphas = attention(rnn_outputs, 50, return_alphas=True)
drop = tf.nn.dropout(attention_output, keep_prob)
print(drop.shape)# (100, 128)
fc_init = tf.uniform_unit_scaling_initializer(factor=1.0)
# lstm连接到全连接层
with tf.variable_scope('fc', initializer=fc_init):
fc1 = tf.layers.dense(drop,
hps.num_fc_nodes,
activation=tf.nn.relu,
name='fc1')
fc1_dropout = tf.layers.dropout(fc1, keep_prob)
logits = tf.layers.dense(fc1_dropout,
num_classes,
name='fc2')
# 计算损失函数
with tf.name_scope('metrics'):
sofmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels= outputs
)
loss = tf.reduce_mean(sofmax_loss)
# optimzer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss)
y_pred = tf.arg_max(tf.nn.softmax(logits=logits),
1,
output_type= tf.int32)
correct_pred = tf.equal(outputs, y_pred)
# accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.sigmoid(y_hat)), outputs), tf.float32))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# 构建train_op
with tf.name_scope('train_op'):
tvars = tf.trainable_variables()
for var in tvars:
tf.logging.info('variable name: {}'.format(var.name))
# 限制训练时的梯度大小,使得不会出现梯度爆炸
grads, _ = tf.clip_by_global_norm(
tf.gradients(loss, tvars), hps.clip_lstm_grads
)
# 梯度应用到变量中去
optimizer = tf.train.AdamOptimizer(hps.learning_rate)
train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step= global_step
)
# return ((inputs, outputs, keep_prob),
return ((inputs, outputs, keep_prob, seq_len_ph),
(loss, accuracy),
# (optimzer)
(train_op, global_step)
)