最近发现模型在更新的一瞬间容易产生超时的问题,于是就了解了一下 tf-serving 中有个warmup主要是通过模型启动时加载${model}/${version}/assets.extra/tf_serving_warmup_requests达到热启动的目的,使得模型更新时不易产生超时的问题
首先根据自己的模型字段进行编写形成tf_serving_warmup_requests文件,在导出模型时和warmup文件一起导出
以下是我warmup文件生成代码
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author: supeihuang
# Time: 2019/9/5 9:33
import numpy
import tensorflow as tf
from tensorflow_serving.apis import model_pb2
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_log_pb2
from tensorflow_serving.apis import prediction_service_pb2
from grpc.beta import implementations
import grpc
import tensorflow as tf
from tensorflow_serving.apis import prediction_service_pb2_grpc
import datetime
tf.app.flags.DEFINE_string('server', '47.93.217.40:31220',
'PredictionService host:port')
tf.app.flags.DEFINE_string('model', 'din_pregrant_posts_3',
'Model name.')
FLAGS = tf.app.flags.FLAGS
_CSV_COLUMNS =["labels", "userId", "itemId","userEntrance", "userRequestTime", "userRequestWeek", "userOs", "userApn","userUa",#9
"userMode", "userProvince", "userCity", "userCityLevel", "userMarr", "userAge","userGestat_week","userAgeRange", "userBage", "userAppV",#10
"userCliN_Inc", "userShoN_Inc", "userBotActCt","userTotalTime", "userView2BottomTimes", "userEffTimes", "userFirstRequest", "userAppLTag", "userHisL",#9
"itemAlgSource", "itemTexL", "itemKwN", "itemTitL", "itemTwN", "itemImgN", "itemSour", "itemCreT", "itemCliN_Inc","itemShoN_Inc",#10
"itemRevi", "itemColN","itemShare", "itemVreN", "itemLireN", "itemLike", "itemEffUsers", "itemView2BottomTimes", "itemTotalTime","itemBotSum","itemMt",#11
"itemContentH", "itemCtr", "itemAvsT", "itemFiR", "itemTimeScore", "itemBotSumCliR", "itemSexW","itemSuperstiW","itemLowTitleW",#9
"itemTtP","itemKtW", "itemKtW2", "itemTag1", "itemTag2", "itemTag3", "itemKs1", "itemKs2",#7
"userItemHistory", "userKeywordHistory", "userKeyword2History","userTag1History", "userTag2History", "userTag3History", "userKs1History", "userKs2History","userKtW","userTtP"]#10
#10个 userItemHistory可有可无
_COLUMNS_NEED_SPILIT = ["userItemHistory", "userKeywordHistory", "userKeyword2History", "userTag1History",
"userTag2History", "userTag3History", "userKs1History", "userKs2History"]
def main():
# create the RPC stub
# channel = implementations.insecure_channel("127.0.0.1", int(8502))
# stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
#
# # create the request object and set the name and signature_name params
# request = predict_pb2.PredictRequest()
# request.model_spec.name = 'din'
# request.model_spec.signature_name = 'predict'
channel = grpc.insecure_channel(FLAGS.server)
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
request = predict_pb2.PredictRequest()
print(FLAGS.model)
request.model_spec.name = FLAGS.model
request.model_spec.signature_name = 'serving_default'
requests = []
file = open("209", 'r', encoding='UTF-8')
i = 0
try:
while True and (i < 200):
text_line = file.readline()
if text_line:
a = eval(text_line)
b = {}
for j in range(76):
b[_CSV_COLUMNS[j]] = [a[j]]
i = i + 1
print("b:",b)
c = ['userKeywordHistory','userKeyword2History','userTag1History',
'userTag2History','userTag3History','userKs1History','userKs2History',"userTtP","userKtW"]
for item in c:
print("item:",item)
tmp = b[item]
print("tmp:",tmp)
if tmp[0] is not '':
b[item] = [list(eval(tmp[0]))]
else:
b[item]=tmp[0]
# print(b['userTag1History'])
requests.append(b)
else:
break
finally:
file.close()
# read file and get value
num = 1
with tf.io.TFRecordWriter("tf_serving_warmup_requests") as writer:
for i in range(0, 1):
request = predict_pb2.PredictRequest(
model_spec=model_pb2.ModelSpec(name="din_pregrant_posts_3", signature_name="serving_default"),
inputs={
# "time_weight": tf.make_tensor_proto([10] * num, shape=[num],
# dtype=tf.int32),
"userEntrance": tf.make_tensor_proto(requests[i]['userEntrance'] * num,shape=[num] , dtype=tf.int32),
"userRequestTime": tf.make_tensor_proto(requests[i]['userRequestTime'] * num,shape=[num] ,dtype=tf.int32),
"userRequestWeek": tf.make_tensor_proto(requests[i]['userRequestWeek']* num,shape=[num] , dtype=tf.int32),
"userOs": tf.make_tensor_proto(requests[i]['userOs']* num,shape=[num] , dtype=tf.int32),
"userApn": tf.make_tensor_proto(requests[i]['userApn']* num,shape=[num] , dtype=tf.int32),
"userUa": tf.make_tensor_proto(requests[i]['userUa']* num,shape=[num] , dtype=tf.int32),
"userMode": tf.make_tensor_proto(requests[i]['userMode']* num,shape=[num] , dtype=tf.int32),
"userProvince": tf.make_tensor_proto(requests[i]['userProvince']* num,shape=[num] , dtype=tf.int32),
"userCity": tf.make_tensor_proto(requests[i]['userCity']* num,shape=[num] , dtype=tf.int32),
"userCityLevel": tf.make_tensor_proto(requests[i]['userCityLevel']* num,shape=[num] , dtype=tf.int32),
"userMarr": tf.make_tensor_proto(requests[i]['userMarr']* num,shape=[num] , dtype=tf.int32),
"userAge": tf.make_tensor_proto(requests[i]['userAge']* num,shape=[num] , dtype=tf.int32),
"userGestat_week": tf.make_tensor_proto(requests[i]['userGestat_week']* num,shape=[num], dtype=tf.int32),
"userAgeRange": tf.make_tensor_proto(requests[i]['userAgeRange']* num,shape=[num] , dtype=tf.int32),
"userBage": tf.make_tensor_proto(requests[i]['userBage']* num,shape=[num] , dtype=tf.int32),
"userAppV": tf.make_tensor_proto(requests[i]['userAppV']* num,shape=[num] , dtype=tf.int32),
"userCliN_Inc": tf.make_tensor_proto(requests[i]['userCliN_Inc']* num,shape=[num] , dtype=tf.int32),
"userShoN_Inc": tf.make_tensor_proto(requests[i]['userShoN_Inc']* num,shape=[num] , dtype=tf.int32),
"userBotActCt": tf.make_tensor_proto(requests[i]['userBotActCt']* num,shape=[num] , dtype=tf.int32),
"userTotalTime": tf.make_tensor_proto(requests[i]['userTotalTime']* num,shape=[num] , dtype=tf.int32),
"userView2BottomTimes": tf.make_tensor_proto(requests[i]['userView2BottomTimes']* num,shape=[num] , dtype=tf.int32),
"userEffTimes": tf.make_tensor_proto(requests[i]['userEffTimes']* num,shape=[num] , dtype=tf.int32),
"userFirstRequest": tf.make_tensor_proto(requests[i]['userFirstRequest']* num,shape=[num] , dtype=tf.int32),
"userAppLTag": tf.make_tensor_proto(requests[i]['userAppLTag']* num,shape=[num] , dtype=tf.int32),
"itemAlgSource": tf.make_tensor_proto(requests[i]['itemAlgSource']* num,shape=[num] , dtype=tf.int32),
"itemTexL": tf.make_tensor_proto(requests[i]['itemTexL']* num,shape=[num] , dtype=tf.int32),
"itemKwN": tf.make_tensor_proto(requests[i]['itemKwN']* num,shape=[num] , dtype=tf.int32),
"itemTitL": tf.make_tensor_proto(requests[i]['itemTitL']* num,shape=[num] , dtype=tf.int32),
"itemTwN": tf.make_tensor_proto(requests[i]['itemTwN']* num,shape=[num] , dtype=tf.int32),
"itemImgN": tf.make_tensor_proto(requests[i]['itemImgN']* num,shape=[num] , dtype=tf.int32),
"itemSour": tf.make_tensor_proto(requests[i]['itemSour']* num,shape=[num] , dtype=tf.int32),
"itemCreT": tf.make_tensor_proto(requests[i]['itemCreT']* num,shape=[num] , dtype=tf.int32),
"itemCliN_Inc": tf.make_tensor_proto(requests[i]['itemCliN_Inc']* num,shape=[num] , dtype=tf.int32),
"itemShoN_Inc": tf.make_tensor_proto(requests[i]['itemShoN_Inc']* num,shape=[num] , dtype=tf.int32),
"itemRevi": tf.make_tensor_proto(requests[i]['itemRevi']* num,shape=[num] , dtype=tf.int32),
"itemColN": tf.make_tensor_proto(requests[i]['itemColN']* num,shape=[num] , dtype=tf.int32),
"itemShare": tf.make_tensor_proto(requests[i]['itemShare']* num,shape=[num] , dtype=tf.int32),
"itemVreN": tf.make_tensor_proto(requests[i]['itemVreN']* num,shape=[num] , dtype=tf.int32),
"itemLireN": tf.make_tensor_proto(requests[i]['itemLireN']* num,shape=[num] , dtype=tf.int32),
"itemLike": tf.make_tensor_proto(requests[i]['itemLike']* num,shape=[num] , dtype=tf.int32),
"itemEffUsers": tf.make_tensor_proto(requests[i]['itemEffUsers']* num,shape=[num] , dtype=tf.int32),
"itemView2BottomTimes": tf.make_tensor_proto(requests[i]['itemView2BottomTimes']* num,shape=[num] , dtype=tf.int32),
"itemTotalTime": tf.make_tensor_proto(requests[i]['itemTotalTime']* num,shape=[num] , dtype=tf.int32),
"itemBotSum": tf.make_tensor_proto(requests[i]['itemBotSum']* num,shape=[num] , dtype=tf.int32),
"itemMt": tf.make_tensor_proto(requests[i]['itemMt'] * num, shape=[num], dtype=tf.int32),
"itemContentH": tf.make_tensor_proto(requests[i]['itemContentH']* num,shape=[num] , dtype=tf.double),
"itemCtr": tf.make_tensor_proto(requests[i]['itemCtr']* num,shape=[num] , dtype=tf.double),
"itemAvsT": tf.make_tensor_proto(requests[i]['itemAvsT']* num,shape=[num] , dtype=tf.double),
"itemFiR": tf.make_tensor_proto(requests[i]['itemFiR']* num,shape=[num] , dtype=tf.double),
"itemTimeScore": tf.make_tensor_proto(requests[i]['itemTimeScore']* num,shape=[num] , dtype=tf.double),
"itemBotSumCliR": tf.make_tensor_proto(requests[i]['itemBotSumCliR']* num,shape=[num] , dtype=tf.double),
"itemSexW": tf.make_tensor_proto(requests[i]['itemSexW']* num,shape=[num] , dtype=tf.double),
"itemSuperstiW": tf.make_tensor_proto(requests[i]['itemSuperstiW']* num,shape=[num] , dtype=tf.double),
"itemLowTitleW": tf.make_tensor_proto(requests[i]['itemLowTitleW']* num,shape=[num] , dtype=tf.double),
"itemKtW": tf.make_tensor_proto(requests[i]['itemKtW']* num,shape=[num] , dtype=tf.int32),
"itemKtW2": tf.make_tensor_proto(requests[i]['itemKtW2']* num,shape=[num] , dtype=tf.int32),
"itemTag1": tf.make_tensor_proto(requests[i]['itemTag1']* num,shape=[num] , dtype=tf.int32),
"itemTag2": tf.make_tensor_proto(requests[i]['itemTag2']* num,shape=[num] , dtype=tf.int32),
"itemTag3": tf.make_tensor_proto(requests[i]['itemTag3']* num,shape=[num] , dtype=tf.int32),
"itemKs1": tf.make_tensor_proto(requests[i]['itemKs1']* num,shape=[num] , dtype=tf.int32),
"itemKs2": tf.make_tensor_proto(requests[i]['itemKs2'] * num, shape=[num], dtype=tf.int32),
"userKeywordHistory": tf.make_tensor_proto(requests[i]['userKeywordHistory'] * num ,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
"userKeyword2History": tf.make_tensor_proto(requests[i]['userKeyword2History']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
"userTag1History": tf.make_tensor_proto(requests[i]['userTag1History']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
"userTag2History": tf.make_tensor_proto(requests[i]['userTag2History']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
"userTag3History": tf.make_tensor_proto(requests[i]['userTag3History']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
"userKs1History": tf.make_tensor_proto(requests[i]['userKs1History']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
"userKs2History": tf.make_tensor_proto(requests[i]['userKs2History']* num, shape=[num,requests[i]['userHisL'][0]],dtype=tf.int32),
# "userTtP": tf.make_tensor_proto(requests[i]['userTtP']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
# "userKtW": tf.make_tensor_proto(requests[i]['userKtW']* num, shape=[num,requests[i]['userHisL'][0]],dtype=tf.int32),
"userHisL": tf.make_tensor_proto(requests[i]['userHisL'] * num, shape=[num], dtype=tf.int32),
})
log = prediction_log_pb2.PredictionLog(predict_log=prediction_log_pb2.PredictLog(request=request))
writer.write(log.SerializeToString())
print(request)
print(requests[i]['userKeywordHistory'])
time_start = datetime.datetime.utcnow()
for i in range(1000):
response = stub.Predict.future(request, 30.)
time_end = datetime.datetime.utcnow()
time_elapsed_sec = (time_end - time_start).total_seconds()
print('Total elapsed time: {} seconds'.format(time_elapsed_sec))
print('Time for batch size {} repeated {} times'.format(1, 1000))
print('Average latency per batch: {} seconds'.format(time_elapsed_sec / 1000))
print(response)
prediction = response.result()
print(prediction)
if __name__ == "__main__":
main()
导出模型是的代码如下
def export_model(model, export_dir, checkpoint_path):
"""Export to SavedModel format.
Args:
model_din: Estimator object
export_dir: directory to export the model_din.
model_column_fn: Function to generate model_din feature columns.
"""
#54个特征
feature_name = [ 'userEntrance', 'userRequestTime', 'userRequestWeek', 'userOs', 'userApn', 'userUa','userMode', 'userProvince', 'userCity', 'userCityLevel',
'userMarr', 'userAge','userGestat_week', 'userAgeRange', 'userBage', 'userAppV', 'userCliN_Inc', 'userShoN_Inc', 'userBotActCt', 'userTotalTime',
'userView2BottomTimes', 'userEffTimes', 'userFirstRequest', 'userAppLTag', 'itemAlgSource', 'itemTexL', 'itemKwN', 'itemTitL', 'itemTwN','itemImgN',
'itemSour', 'itemCreT', 'itemCliN_Inc', 'itemShoN_Inc', 'itemRevi', 'itemColN', 'itemShare', 'itemVreN', 'itemLireN', 'itemLike',
'itemEffUsers', 'itemView2BottomTimes','itemTotalTime', 'itemBotSum','itemMt','itemContentH', 'itemCtr', 'itemAvsT', 'itemFiR', 'itemTimeScore','itemBotSumCliR',
'itemSexW','itemSuperstiW', 'itemLowTitleW']
serving_features = {}
for item in feature_name:
double_feature = ["itemContentH", "itemCtr", "itemAvsT", "itemFiR", "itemTimeScore",
"itemBotSumCliR", "itemSexW", "itemSuperstiW", "itemLowTitleW"]
if item in double_feature:
serving_features[item] = tf.placeholder(tf.double, [None, ], name=item)
else:
serving_features[item] = tf.placeholder(tf.int32, [None, ], name=item)
#18个
serving_features1 = {
# "itemTtp": tf.placeholder(tf.int32, [None, ], name='itemTtp'),
"itemKtW": tf.placeholder(tf.int32, [None, ], name='keyword'),
"itemKtW2": tf.placeholder(tf.int32, [None, ], name='keyword2'),
"itemTag1": tf.placeholder(tf.int32, [None, ], name='tag1'),
"itemTag2": tf.placeholder(tf.int32, [None, ], name='tag2'),
"itemTag3": tf.placeholder(tf.int32, [None, ], name='tag3'),
"itemKs1": tf.placeholder(tf.int32, [None, ], name='ks1'),
"itemKs2": tf.placeholder(tf.int32, [None, ], name='ks2'),
"userKeywordHistory": tf.placeholder(tf.int32, [None, None], name='hist_keyword'),
"userKeyword2History": tf.placeholder(tf.int32, [None, None], name='hist_keyword2'),
"userTag1History": tf.placeholder(tf.int32, [None, None], name='hist_tag1'),
"userTag2History": tf.placeholder(tf.int32, [None, None], name='hist_tag2'),
"userTag3History": tf.placeholder(tf.int32, [None, None], name='hist_tag3'),
"userKs1History": tf.placeholder(tf.int32, [None, None], name='hist_ks1'),
"userKs2History": tf.placeholder(tf.int32, [None, None], name='hist_ks2'),
# "userTtP": tf.placeholder(tf.int32, [None, None], name='userTtP'),
# "userKtW": tf.placeholder(tf.int32, [None, None], name='userKtW'),
"userHisL": tf.placeholder(tf.int32, [None, ], name='sl')
}
serving_features.update(serving_features1)
#构建一个serving_input_receiver_fn期望特征张量
example_input_fn = (tf.estimator.export.build_raw_serving_input_receiver_fn(serving_features))
# 导出为Tensorflow SavedModel
return model.export_savedmodel(export_dir, example_input_fn, checkpoint_path=checkpoint_path,assets_extra={'tf_serving_warmup_requests': '/data/supeihuang/din_209_data/data_item/tf_serving_warmup_requests'})