项目来源于:https://github.com/NLPxiaoxu/Easy_Lstm_Cnn
使用LSTM的文本分类项目,非常感谢项目贡献者
由于有之前项目的经验,这里模型序列化就比较简单了,最主要就是注意GPU的问题,如果GPU部署过TensorFlow Serving模型了,这块GPU就不要再运行TensorFlow项目了,否则会报错。
这里我对原始项目进行了改造,主要就是将参数配置在config文件里面了。
import tensorflow as tf
from data_processing import seq_length
from Parameters import Parameters as pm
class Lstm_CNN(object):
def __init__(self,config):
self.config = config
self.seq_length = config['seq_length']
self.num_classes = config['num_classes']
self.vocab_size = config['vocab_size']
self.embedding_dim = config['embedding_dim']
self.pre_training = config['pre_training']
self.hidden_dim = config['hidden_dim']
self.filters_size = config['filters_size']
self.num_filters = config['num_filters']
self.keep_prob = config['keep_prob']
self.learning_rate = config['learning_rate']
self.lr_decay = config['lr_decay']
self.clip = config['clip']
self.num_epochs = config['num_epochs']
self.batch_size = config['batch_size']
self.input_x = tf.placeholder(tf.int32, shape=[None, self.seq_length], name='input_x')
self.input_y = tf.placeholder(tf.float32, shape=[None, self.num_classes], name='input_y')
self.length = tf.placeholder(tf.int32, shape=[None], name='rnn_length')
self.keep_pro = tf.placeholder(tf.float32, name='dropout')
self.global_step = tf.Variable(0, trainable=False, name='global_step')
self.lstm_cnn()
def lstm_cnn(self):
with tf.device('/device:GPU:0'), tf.name_scope('embedding'):
self.embedding = tf.get_variable("embeddings", shape=[self.vocab_size, self.embedding_dim],
initializer=tf.constant_initializer(self.pre_training))
embedding_input = tf.nn.embedding_lookup(self.embedding, self.input_x)
with tf.name_scope('LSTM'):
cell = tf.nn.rnn_cell.LSTMCell(self.hidden_dim, state_is_tuple=True)
Cell = tf.contrib.rnn.DropoutWrapper(cell, self.keep_pro)
output, _ = tf.nn.dynamic_rnn(cell=Cell, inputs=embedding_input, sequence_length=self.length, dtype=tf.float32)
with tf.name_scope('CNN'):
outputs = tf.expand_dims(output, -1) #[batch_size, seq_length, hidden_dim, 1]
pooled_outputs = []
for i, filter_size in enumerate(self.filters_size):
filter_shape = [filter_size, self.hidden_dim, 1, self.num_filters]
w = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name='w')
b = tf.Variable(tf.constant(0.1, shape=[self.num_filters]), name='b')
conv = tf.nn.conv2d(outputs, w, strides=[1, 1, 1, 1], padding='VALID', name='conv')
h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
pooled = tf.nn.max_pool(h, ksize=[1, self.seq_length-filter_size+1, 1, 1],
strides=[1, 1, 1, 1], padding='VALID', name='pool')
pooled_outputs.append(pooled)
output_ = tf.concat(pooled_outputs, 3)
self.output = tf.reshape(output_, shape=[-1, 3*self.num_filters])
with tf.name_scope('output'):
out_final = tf.nn.dropout(self.output, keep_prob=self.keep_pro)
o_w = tf.Variable(tf.truncated_normal([3*self.num_filters, self.num_classes], stddev=0.1), name='o_w')
o_b = tf.Variable(tf.constant(0.1, shape=[self.num_classes]), name='o_b')
self.logits = tf.matmul(out_final, o_w) + o_b
self.predict = tf.argmax(tf.nn.softmax(self.logits), 1, name='score')
with tf.name_scope('loss'):
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.input_y)
self.loss = tf.reduce_mean(cross_entropy)
with tf.name_scope('optimizer'):
# 退化学习率 learning_rate = lr*(0.9**(global_step/10);staircase=True表示每decay_steps更新梯度
# learning_rate = tf.train.exponential_decay(self.config.lr, global_step=self.global_step,
# decay_steps=10, decay_rate=self.config.lr_decay, staircase=True)
# optimizer = tf.train.AdamOptimizer(learning_rate)
# self.optimizer = optimizer.minimize(self.loss, global_step=self.global_step) #global_step 自动+1
# no.2
optimizer = tf.train.AdamOptimizer(self.learning_rate)
gradients, variables = zip(*optimizer.compute_gradients(self.loss)) # 计算变量梯度,得到梯度值,变量
gradients, _ = tf.clip_by_global_norm(gradients, self.clip)
# 对g进行l2正则化计算,比较其与clip的值,如果l2后的值更大,让梯度*(clip/l2_g),得到新梯度
self.optimizer = optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step)
# global_step 自动+1
with tf.name_scope('accuracy'):
correct = tf.equal(self.predict, tf.argmax(self.input_y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
def feed_data(self,x_batch, real_seq_len, keep_pro):
feed_dict ={self.input_x: x_batch,
self.length: real_seq_len,
self.keep_pro: keep_pro}
return feed_dict
这里比较简单的可以看出来,模型的初始化阶段有input_x、input_y、length、keep_pro、global_step这么几个值,其中global_step我们预测阶段不需要,而input_y主要是模型训练过程中做验证用的,所以也不需要,所以模型的输入有三个值,分别是input_x、length、keep_pro
还有最后一个注意的点就是:embedding层
with tf.device('/device:GPU:0'), tf.name_scope('embedding'):
self.embedding = tf.get_variable("embeddings", shape=[self.vocab_size, self.embedding_dim],
initializer=tf.constant_initializer(self.pre_training))
embedding_input = tf.nn.embedding_lookup(self.embedding, self.input_x)
这段代码里,embedding层需要初始化,而初始化来源是需要读取一个vector_word_npz文件
所以模型导出代码就如下:
# encoding=utf-8
import tensorflow as tf
from Lstm_Cnn import Lstm_CNN
from tensorflow.python import pywrap_tensorflow
import os
from data_processing import read_category, get_wordid, get_word2vec, process, seq_length, load_config
from Parameters import Parameters
input_model_path = '/data/guoyin/LstmNewsClassTFServing/checkpoints/Lstm_CNN/best_validation-19870'
trans_model_path = '/data/guoyin/LstmNewsClassTFServing/trans_model/1/'
def export_model():
config_path = '/data/guoyin/LstmNewsClassTFServing/config_file'
config = load_config(config_path)
wordId = get_wordid(config['vocab_filename'])
config['vocab_size'] = len(wordId)
config['pre_training'] = get_word2vec(config['vector_word_npz'])
# 初始化模型
model = Lstm_CNN(config)
session = tf.Session()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess = session, save_path=input_model_path)
# 创建一个builder,并将导出模型路径加载进来
builder = tf.saved_model.builder.SavedModelBuilder(trans_model_path)
# 将输入及输出张量与名称挂钩
inputs = {
'input_x': tf.saved_model.utils.build_tensor_info(model.input_x),
'rnn_length': tf.saved_model.utils.build_tensor_info(model.length),
'dropout': tf.saved_model.utils.build_tensor_info(model.keep_pro)
}
outputs = {
'predict': tf.saved_model.utils.build_tensor_info(model.predict)
}
# 签名定义
class_signautre_def = tf.saved_model.signature_def_utils.build_signature_def(
inputs = inputs,
outputs = outputs,
method_name = tf.saved_model.signature_constants.PREDICT_METHOD_NAME
)
# 加入运行图中
builder.add_meta_graph_and_variables(
session,
[tf.saved_model.tag_constants.SERVING],
signature_def_map={
'class_def': class_signautre_def
}
)
builder.save()
# 查看原始模型的结构
def get_origin_structure():
reader = pywrap_tensorflow.NewCheckpointReader(input_model_path)
var_to_shape_map = reader.get_variable_to_shape_map()
# 打印出tensor名和值
for key in var_to_shape_map:
print('tensor_name',key)
# 查看导出后模型的结构
def get_export_model_structure():
sess = tf.Session()
meta_graph_def = tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], trans_model_path)
signature = meta_graph_def.signature_def
print(signature['class_def'])
if __name__ == "__main__":
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
# get_origin_structure()
# export_model()
get_export_model_structure()
二、本地测试导出模型
# encoding=utf-8
import tensorflow as tf
import numpy as np
from numpy import mat,random
from data_processing import read_category, get_wordid, get_word2vec, process,seq_length, load_config
import os
def predict(feed_dict, signature):
inputs_tensor_name = signature['class_def'].inputs['input_x'].name
rnn_length_tensor_name = signature['class_def'].inputs['rnn_length'].name
dropout_tensor_name = signature['class_def'].inputs['dropout'].name
predict_tensor_name = signature['class_def'].outputs['predict'].name
inputs = sess.graph.get_tensor_by_name(inputs_tensor_name)
rnn_length = sess.graph.get_tensor_by_name(rnn_length_tensor_name)
dropout = sess.graph.get_tensor_by_name(dropout_tensor_name)
predict = sess.graph.get_tensor_by_name(predict_tensor_name)
predict = sess.run([predict], feed_dict={
inputs:feed_dict['input_x'],
rnn_length:feed_dict['rnn_length'],
dropout:feed_dict['keep_pro']
})
return predict
def feed_data(x_batch, real_seq_len, keep_pro):
feed_dict ={'input_x': x_batch,
'rnn_length': real_seq_len,
'keep_pro': keep_pro}
return feed_dict
if __name__ == "__main__":
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
content = '本报讯 夏日温度虽然不断攀升,但廿一农旅综合体项目的施工现场,却不乏建筑工人们忙碌的身影。据悉,该项目位于妙西镇西塞山省级旅游度假区内,项目规划范围136亩,总建设用地面积5.89亩,属于浙江省坡地村镇试点项目。“目前一期已经基本完成,二期已完成80%,整个项目已形成主题风格,具备入住条件,8月底前完成项目整个建设,10月份项目开始整体试运营测试。”该项目负责人徐栋告诉记者。'
config_path = '/data/guoyin/LstmNewsClassTFServing/config_file'
config = load_config(config_path)
wordId = get_wordid(config['vocab_filename'])
categories, cat_to_id = read_category()
export_dir = '/data/guoyin/LstmNewsClassTFServing/trans_model/1/'
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# 创建数据
val_x = process(content = content,word_to_id=wordId,cat_to_id=cat_to_id,max_length=config['seq_length'])
real_seq_len = seq_length(val_x)
feed_dict = feed_data(val_x, real_seq_len, 1.0)
meta_graph_def = tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], export_dir)
signature = meta_graph_def.signature_def
predict_label = predict(feed_dict=feed_dict, signature=signature)
# 模型初始化 及24个国标分类
category = {'01': '政治', '02': '法律、司法', '03': '对外关系、国际关系', '04': '军事', '05': '社会、劳动', '06': '灾难、事故', '11': '经济',
'12': '财政、金融', '13': '基本建设、建筑业、房地产', '14': '农业、农村', '15': '矿业、工业', '16': '能源、水务、水利', '17': '电子信息产业',
'18': '交通、运输、邮政、物流', '19': '商业、外贸、海关', '21': '服务业、旅游业', '22': '环境、气象', '31': '教育', '33': '科学技术',
'35': '文化、休闲、娱乐', '36': '文学、艺术', '37': '传媒', '38': '医药、卫生', '39': '体育'}
result = []
for key, value in cat_to_id.items():
if value == predict_label[0]:
for key_cat, value_cat in category.items():
if key_cat == key:
result.append({"id": key_cat, "name": value_cat})
print(result)
可以看出正确输出结果
三、TensorFlow Serving部署模型,并提供服务
# encoding=utf-8
import grpc
import numpy as np
import tensorflow as tf
from tensorflow.contrib.util import make_tensor_proto
import time
from data_processing import read_category, get_wordid, get_word2vec, process,seq_length, load_config
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
server = '192.168.1.99:8512'
channel = grpc.insecure_channel(server)
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
request = predict_pb2.PredictRequest()
request.model_spec.name = 'classmodel'
request.model_spec.signature_name = 'class_def'
content = '本报讯 夏日温度虽然不断攀升,但廿一农旅综合体项目的施工现场,却不乏建筑工人们忙碌的身影。据悉,该项目位于妙西镇西塞山省级旅游度假区内,项目规划范围136亩,总建设用地面积5.89亩,属于浙江省坡地村镇试点项目。“目前一期已经基本完成,二期已完成80%,整个项目已形成主题风格,具备入住条件,8月底前完成项目整个建设,10月份项目开始整体试运营测试。”该项目负责人徐栋告诉记者。'
config_path = '/data/guoyin/LstmNewsClassTFServing/config_file'
config = load_config(config_path)
wordId = get_wordid(config['vocab_filename'])
categories, cat_to_id = read_category()
start = time.perf_counter()
# 创建数据
val_x = process(content = content,word_to_id=wordId,cat_to_id=cat_to_id,max_length=config['seq_length'])
real_seq_len = seq_length(val_x)
request.inputs['input_x'].CopyFrom(make_tensor_proto(val_x, dtype=np.int32))
request.inputs['rnn_length'].CopyFrom(make_tensor_proto(real_seq_len, dtype=np.int32))
request.inputs['dropout'].CopyFrom(make_tensor_proto(np.float32(1.0)))
response = stub.Predict(request, 30.0)
end = time.perf_counter()
predict_label = response.outputs['predict'].int64_val
# 模型初始化 及24个国标分类
category = {'01': '政治', '02': '法律、司法', '03': '对外关系、国际关系', '04': '军事', '05': '社会、劳动', '06': '灾难、事故', '11': '经济',
'12': '财政、金融', '13': '基本建设、建筑业、房地产', '14': '农业、农村', '15': '矿业、工业', '16': '能源、水务、水利', '17': '电子信息产业',
'18': '交通、运输、邮政、物流', '19': '商业、外贸、海关', '21': '服务业、旅游业', '22': '环境、气象', '31': '教育', '33': '科学技术',
'35': '文化、休闲、娱乐', '36': '文学、艺术', '37': '传媒', '38': '医药、卫生', '39': '体育'}
result = []
for key, value in cat_to_id.items():
if value == predict_label[0]:
for key_cat, value_cat in category.items():
if key_cat == key:
result.append({"id": key_cat, "name": value_cat})
print(result)