NLP分类常用模型(三):bert 提取特征 + 全连接tf.layers.dense()

bert提取句向量特征:

from bert_serving.client import BertClient
# message 这个表示一个batch_size(cporch)数据
    def _get_message_text(self, message):
        all_tokens = []
        # msg 是某一句话
        for msg in message:
            msg_tokens = []
            for t in msg.get("tokens"):
                text = self._replace_number_blank(t.text)
                if text != '':
                    # msg_tokens 一句话的所有tokens集合[你,能,不能,查下,余额]
                    msg_tokens.append(text)
            a = str(msg_tokens)
            a = a.replace('[', '')
            a = a.replace(']', '')
            a = a.replace(',', '')
            a = a.replace('\'', '')
            a = a.replace(' ', '')

            # 一个batch_size 所有的tokens集合
            all_tokens.append(list(a))
        bert_embedding = BertClient.encode(all_tokens, is_tokenized=True)
        return np.squeeze(bert_embedding)

全连接层结构:

def conv_net(x, n_classes, num_layers, layer_size, C2, dropout, is_training):
    # Define a scope for reusing the variables
    # layer_size == [1024,256]
    # num_layers == 2
    with tf.variable_scope('ConvNet'):
        # Flatten the data to a 1-D vector for the fully connected layer
        x = tf.contrib.layers.flatten(x)
        reg = tf.contrib.layers.l2_regularizer(C2)
        name = 'dense'
        for i in range(num_layers):
            x = tf.layers.dense(inputs=x,
                                units=layer_size[i],
                                activation=tf.nn.relu,
                                kernel_regularizer=reg,
                                name='hidden_layer_{}_{}'.format(name, i))
            x = tf.layers.dropout(x, rate=dropout, training=is_training)

        out = tf.layers.dense(inputs=x,
                            units=n_classes,
                            kernel_regularizer=reg,
                            name='dense_layer_{}'.format(name))
    return out
    
logits_train = conv_net(self.a_in, num_classes, self.num_hidden_layers, self.hidden_layer_size, self.C2, self.drop_out, is_training=True)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits_train, labels=self.b_in)
# get_regularization_loss(): 获取整体的正则化loss。 
loss = tf.reduce_mean(cross_entropy) + tf.losses.get_regularization_loss()
self.y_predict = tf.nn.softmax(logits_train)
 train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss)
 

你可能感兴趣的:(人工智能,tensorflow,自然语言处理,深度学习,tensorflow,机器学习,数据挖掘)