class LSTM(Object):
def __init__(self, config): # input attributes as a dictionary
# 给成员变量赋值
self.attribute = config["attribute"]
pass
# set placeholder for input data
x_input = tf.placeholder(tf.placeholder(tf.int32, shape=[None, None], name='x_input'))
y_input = tf.placeholder(tf.placeholder(tf.int32, shape=[None, None], name='y_input'))
def build_graph(self):
# 创建图
# 定义网络结构,添加层(fully-connected layer要自己写)
# 几个计算的操作要写成成员函数
self.loss = tf.reduce_mean(tf.mm.sparse_softmax_cross_logitd(logits=y_hat, labels=self.label))
self.prediction = tf.argmax(tf.nn.softmax(y_hat), 1)
self.global_step = tf.Variable(0, name="global_step", trainable=False)
# 计算attention的函数u,写作成员函数
self.attention = pass
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
self.train_op = self.optimizer.apply_gradients(zip(gradients, trainable_variables), global_step, name="train+step")
构建网络结构的时候,尤其要注意网络中输入输出的张量的维度与层的参数是否一致。最好在每一步操作的前后用注释写出输入输出的张量维度。
def data_preparation(data_path):
pass
在该函数中,读取数据,清理数据,划分数据集(training set,development set),分词,完成words到ids、或vectors的映射。
def batch_generator():
num_batches_per_epoch = data_size // batch_size + 1
# shuffle data every time
shuffle_indices = np.random.permutation(np.arange(data_size))
data_shuffled = data[shuffle_indices]
# yield generator
for i in range(num_batches_per_epoch):
start_pos = idx * batch_size
end_pos = min(start_pos + batch_size, data_size - 1)
# return a batch
yield data_shuffled[start_pos:end_pos]
此部分是一个列表生成器,以训练数据作为参数,返回每个training step需要的训练数据对。很好地运用了yield的性质,配合trainer函数中的for循环,每次for循环调用batch_generator,会在batch_generator的for循环中得到开始截止的position,从而获得一个batch的输入。
此处的data应该是经过zip的: data = np.array(list(zip(x_train, y_train)))
返回的data_shuffled[start_pos:end_pos]还需要解压zip(* data)。
def val_step(x_val, y_val):
feed_dict = {
model.x_input: x_val,
model.y_input: y_val
}
# 此处只需要run成员函数prediction,并且计算accuracy即可
prediction = sess.run(model.prediction, feed_dict)
acc = np.sum(np.equal(prediction, y_val)) / len(prediction)
return acc
def train_step(x_train, y_train):
feed_dict = {
model.x_input: x_train,
model.y_input: y_train
}
to_return = {
'train_op': model.train_op,
'loss': model.loss,
'global_step': model.global_step,
'summaries': merged_summary_op
}
# return将返回一个dict类型的数据,键为loss,global_step等,值为run相应操作后的结果
return sess.run(to_return, feed_dict)
def trainer():
# 利用超参数,实例化网络模型,构建图
model = LSTM(config)
model.build_graph()
# 当所有的变量已经完成了定义,即可进行全局变量初始化
sess = tf.Session()
# 获取数据集
x_train, y_train, x_val, y_val = load_data()
# record summary
loss_summary = tf.summary.scalar('loss', model.loss)
acc_summary = tf.summary.scalar('accuracy', model.accuracy)
merged_summary_op = tf.summary.merge([loss_summary, acc_summary])
summary_writer = tf.summary.FileWriter(os.path.join(sum_path, 'train'), sess.graph)
# Set Saver
saver = tf.train.Saver(max_to_keep=5)
# initializer global varibles
sess.run(tf.global_variables_initializer())
# 设置循环开始训练,epoch中对batch进行训练
for ep in range(config["num_epoch"]):
# get a batch generator
batcher = batch_generator(x_train, y_train)
for batch in batcher:
x_train, y_train = zip(*batch)
# feed data to placeholder
feed_dict = {
model.x_input: x_train,
model.y_input: y_train
}
return_dict = train_step(x_train, y_train)
# 记录summary
summary_writer.add_summary(
return_dict["summaries"],
return_dict["global_step"]
)
# value current model every val_interval steps
if tf.train.global_step(sess, global_step) % val_interval:
acc_history.append(val_step(x_val, y_val))
# save the best model
if acc_history[-1] == max(acc_history):
saver.save(sess, os.path.join(ckpt_path))
每个step(对于一个batch的训练)记录一次summary,每个epoch输出一次训练信息(loss, accuracy等),并且保存checkpoints,格外保存最佳的模型(one with minimal loss or maximal accuracy)。
# hyper-parameters
config = {
"attribute": value
}
参数的分类:
与模型结构有关的参数,以字典的方式储存方便管理。其他的就直接当做全局变量进行定义即可。