cell_forward = tf.contrib.rnn.BasicLSTMCell(unit_num) cell_backward = tf.contrib.rnn.BasicLSTMCell(unit_num)
改为
cell_forward = tf.nn.rnn_cell.GRUCell(unit_num) cell_backward = tf.nn.rnn_cell.GRUCell(unit_num)
_fw_cells = [] _bw_cells = [] for _ in range(2): cell_forward = tf.contrib.rnn.BasicLSTMCell(unit_num) cell_backward = tf.contrib.rnn.BasicLSTMCell(unit_num) _fw_cells.append(tf.nn.rnn_cell.DropoutWrapper(cell_forward, output_keep_prob=0.6)) _bw_cells.append(tf.nn.rnn_cell.DropoutWrapper(cell_backward, output_keep_prob=0.6)) cell_forward = tf.nn.rnn_cell.MultiRNNCell(_fw_cells) cell_backward = tf.nn.rnn_cell.MultiRNNCell(_bw_cells)
self.loss = tf.reduce_mean(-self.log_likelihood) #L2正则 self.l2_loss = self._l2_rate * (tf.nn.l2_loss(W) + tf.nn.l2_loss(b)) self.loss = self.loss + self.l2_loss self.train_op = tf.train.AdamOptimizer(LEARN_RATE).minimize(self.loss)
def Attn(input): w_Q = tf.random_uniform((tf.shape(input)[2],100),0,1) w_K = tf.random_uniform((tf.shape(input)[2], 100), 0, 1) w_V = tf.random_uniform((tf.shape(input)[2], 100), 0, 1) encoder_Q = tf.matmul(tf.reshape(input, (-1, tf.shape(input)[2])), w_Q) encoder_K = tf.matmul(tf.reshape(input, (-1, tf.shape(input)[2])), w_K) encoder_V = tf.matmul(tf.reshape(input, (-1, tf.shape(input)[2])), w_V) encoder_Q = tf.reshape(encoder_Q, (tf.shape(input)[0], tf.shape(input)[1], -1)) encoder_K = tf.reshape(encoder_K, (tf.shape(input)[0], tf.shape(input)[1], -1)) encoder_V = tf.reshape(encoder_V, (tf.shape(input)[0], tf.shape(input)[1], -1)) attention_map = tf.matmul(encoder_Q, tf.transpose(encoder_K, [0, 2, 1])) #attention_map = attention_map / 8 attention_map = tf.nn.softmax(attention_map) weightedSumV = tf.matmul(attention_map, encoder_V) w_Z = tf.random_uniform((100, tf.shape(input)[2]), 0, 1) outputs = tf.matmul(weightedSumV, w_Z) #outputs = tf.concat((outputs,outputs),2) return outputs