尝试提高bi-lstm+crf准确率的几个尝试

一.将lstm改为gru。

cell_forward = tf.contrib.rnn.BasicLSTMCell(unit_num)
cell_backward = tf.contrib.rnn.BasicLSTMCell(unit_num)

改为

cell_forward = tf.nn.rnn_cell.GRUCell(unit_num)
cell_backward = tf.nn.rnn_cell.GRUCell(unit_num)

二.多层RNN结构

_fw_cells = []
_bw_cells = []
for _ in range(2):
    cell_forward = tf.contrib.rnn.BasicLSTMCell(unit_num)
    cell_backward = tf.contrib.rnn.BasicLSTMCell(unit_num)
    _fw_cells.append(tf.nn.rnn_cell.DropoutWrapper(cell_forward, output_keep_prob=0.6))
    _bw_cells.append(tf.nn.rnn_cell.DropoutWrapper(cell_backward, output_keep_prob=0.6))
cell_forward = tf.nn.rnn_cell.MultiRNNCell(_fw_cells)
cell_backward = tf.nn.rnn_cell.MultiRNNCell(_bw_cells)

三.损失函数添加L2正则项

self.loss = tf.reduce_mean(-self.log_likelihood)
#L2正则
self.l2_loss = self._l2_rate * (tf.nn.l2_loss(W) + tf.nn.l2_loss(b))
self.loss = self.loss + self.l2_loss
self.train_op = tf.train.AdamOptimizer(LEARN_RATE).minimize(self.loss)

四.直接用attention机制替代lstm(效果极差)

def Attn(input):
    w_Q = tf.random_uniform((tf.shape(input)[2],100),0,1)
    w_K = tf.random_uniform((tf.shape(input)[2], 100), 0, 1)
    w_V = tf.random_uniform((tf.shape(input)[2], 100), 0, 1)

    encoder_Q = tf.matmul(tf.reshape(input, (-1, tf.shape(input)[2])), w_Q)
    encoder_K = tf.matmul(tf.reshape(input, (-1, tf.shape(input)[2])), w_K)
    encoder_V = tf.matmul(tf.reshape(input, (-1, tf.shape(input)[2])), w_V)

    encoder_Q = tf.reshape(encoder_Q, (tf.shape(input)[0], tf.shape(input)[1], -1))
    encoder_K = tf.reshape(encoder_K, (tf.shape(input)[0], tf.shape(input)[1], -1))
    encoder_V = tf.reshape(encoder_V, (tf.shape(input)[0], tf.shape(input)[1], -1))

    attention_map = tf.matmul(encoder_Q, tf.transpose(encoder_K, [0, 2, 1]))
    #attention_map = attention_map / 8
    attention_map = tf.nn.softmax(attention_map)

    weightedSumV = tf.matmul(attention_map, encoder_V)

    w_Z = tf.random_uniform((100, tf.shape(input)[2]), 0, 1)

    outputs = tf.matmul(weightedSumV, w_Z)
    #outputs = tf.concat((outputs,outputs),2)
    return outputs

五.提高学习率(在训练集大幅度提升效果,在测试集有过拟合现象)

 

 

 

 

你可能感兴趣的:(自然语言处理)