
1. define a model

class LSTM(Object):
	def __init__(self, config):  # input attributes as a dictionary
		# 给成员变量赋值
        self.attribute = config["attribute"]
    	# set placeholder for input data
    	x_input = tf.placeholder(tf.placeholder(tf.int32, shape=[None, None], name='x_input'))
    	y_input = tf.placeholder(tf.placeholder(tf.int32, shape=[None, None], name='y_input'))
    def build_graph(self):
    	# 创建图
    	# 定义网络结构,添加层(fully-connected layer要自己写)
    	# 几个计算的操作要写成成员函数
    	self.loss = tf.reduce_mean(tf.mm.sparse_softmax_cross_logitd(logits=y_hat, labels=self.label))
    	self.prediction = tf.argmax(tf.nn.softmax(y_hat), 1)
    	self.global_step = tf.Variable(0, name="global_step", trainable=False)
    	# 计算attention的函数u,写作成员函数
    	self.attention = pass
    	self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
    	self.train_op = self.optimizer.apply_gradients(zip(gradients, trainable_variables), global_step, name="train+step")


2. data pre-processing

def data_preparation(data_path):

在该函数中,读取数据,清理数据,划分数据集(training set,development set),分词,完成words到ids、或vectors的映射。

def batch_generator():
    num_batches_per_epoch = data_size // batch_size + 1
    # shuffle data every time
	shuffle_indices = np.random.permutation(np.arange(data_size))
	data_shuffled = data[shuffle_indices]
	# yield generator
	for i in range(num_batches_per_epoch):
        start_pos = idx * batch_size
        end_pos = min(start_pos + batch_size, data_size - 1)
        # return a batch
        yield data_shuffled[start_pos:end_pos]

此部分是一个列表生成器,以训练数据作为参数,返回每个training step需要的训练数据对。很好地运用了yield的性质,配合trainer函数中的for循环,每次for循环调用batch_generator,会在batch_generator的for循环中得到开始截止的position,从而获得一个batch的输入。
此处的data应该是经过zip的: data = np.array(list(zip(x_train, y_train)))返回的data_shuffled[start_pos:end_pos]还需要解压zip(* data)。

3. trainer function

def val_step(x_val, y_val):
	feed_dict = {
        model.x_input: x_val,
        model.y_input: y_val
    # 此处只需要run成员函数prediction,并且计算accuracy即可
    prediction = sess.run(model.prediction, feed_dict)
    acc = np.sum(np.equal(prediction, y_val)) / len(prediction)
    return acc
def train_step(x_train, y_train):
    feed_dict = {
        model.x_input: x_train,
        model.y_input: y_train
    to_return = {
        'train_op': model.train_op,
        'loss': model.loss,
        'global_step': model.global_step,
        'summaries': merged_summary_op
    # return将返回一个dict类型的数据,键为loss,global_step等,值为run相应操作后的结果
    return sess.run(to_return, feed_dict)

def trainer():
	# 利用超参数,实例化网络模型,构建图
	model = LSTM(config)
    # 当所有的变量已经完成了定义,即可进行全局变量初始化
    sess = tf.Session()
    # 获取数据集
	x_train, y_train, x_val, y_val = load_data()
    # record summary
    loss_summary = tf.summary.scalar('loss', model.loss)
    acc_summary = tf.summary.scalar('accuracy', model.accuracy)
    merged_summary_op = tf.summary.merge([loss_summary, acc_summary])
    summary_writer = tf.summary.FileWriter(os.path.join(sum_path, 'train'), sess.graph)

    # Set Saver
    saver = tf.train.Saver(max_to_keep=5)
    # initializer global varibles
	# 设置循环开始训练,epoch中对batch进行训练
	for ep in range(config["num_epoch"]):
		# get a batch generator
		batcher = batch_generator(x_train, y_train)
		for batch in batcher:
			x_train, y_train = zip(*batch)
			# feed data to placeholder
			feed_dict = {
                model.x_input: x_train,
                model.y_input: y_train
			return_dict = train_step(x_train, y_train)
			# 记录summary
			# value current model every val_interval steps
			if tf.train.global_step(sess, global_step) % val_interval:
				acc_history.append(val_step(x_val, y_val))
				# save the best model
				if acc_history[-1] == max(acc_history):
					saver.save(sess, os.path.join(ckpt_path))

每个step(对于一个batch的训练)记录一次summary,每个epoch输出一次训练信息(loss, accuracy等),并且保存checkpoints,格外保存最佳的模型(one with minimal loss or maximal accuracy)。

4. set parameters

# hyper-parameters
config = {
"attribute": value


  1. data processing parameters: path of folders, percentage of validation set to dataset;
  2. model parameters: number of filters, size of filters, embedding size, etc;
  3. training parameters: learning rate,l2 regularization lambda, number of epoches, size of batch, max_ckpt_num, val_interval, etc.

