tensorflow tf-serving 进行warmup设置

最近发现模型在更新的一瞬间容易产生超时的问题,于是就了解了一下 tf-serving 中有个warmup主要是通过模型启动时加载${model}/${version}/assets.extra/tf_serving_warmup_requests达到热启动的目的,使得模型更新时不易产生超时的问题

首先根据自己的模型字段进行编写形成tf_serving_warmup_requests文件,在导出模型时和warmup文件一起导出

以下是我warmup文件生成代码

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author: supeihuang
# Time: 2019/9/5 9:33

import numpy
import tensorflow as tf
from tensorflow_serving.apis import model_pb2
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_log_pb2
from tensorflow_serving.apis import prediction_service_pb2
from grpc.beta import implementations
import grpc
import tensorflow as tf
from tensorflow_serving.apis import prediction_service_pb2_grpc
import datetime


tf.app.flags.DEFINE_string('server', '47.93.217.40:31220',
                           'PredictionService host:port')


tf.app.flags.DEFINE_string('model', 'din_pregrant_posts_3',
                           'Model name.')
FLAGS = tf.app.flags.FLAGS

_CSV_COLUMNS =["labels", "userId", "itemId","userEntrance", "userRequestTime", "userRequestWeek", "userOs", "userApn","userUa",#9
                "userMode", "userProvince", "userCity", "userCityLevel", "userMarr", "userAge","userGestat_week","userAgeRange", "userBage", "userAppV",#10
                "userCliN_Inc", "userShoN_Inc", "userBotActCt","userTotalTime", "userView2BottomTimes", "userEffTimes", "userFirstRequest", "userAppLTag", "userHisL",#9
                 "itemAlgSource", "itemTexL", "itemKwN", "itemTitL", "itemTwN", "itemImgN", "itemSour", "itemCreT", "itemCliN_Inc","itemShoN_Inc",#10
                 "itemRevi", "itemColN","itemShare", "itemVreN", "itemLireN", "itemLike", "itemEffUsers", "itemView2BottomTimes", "itemTotalTime","itemBotSum","itemMt",#11
                "itemContentH", "itemCtr", "itemAvsT", "itemFiR", "itemTimeScore", "itemBotSumCliR", "itemSexW","itemSuperstiW","itemLowTitleW",#9
                "itemTtP","itemKtW", "itemKtW2", "itemTag1", "itemTag2", "itemTag3", "itemKs1", "itemKs2",#7
                "userItemHistory", "userKeywordHistory", "userKeyword2History","userTag1History", "userTag2History", "userTag3History", "userKs1History", "userKs2History","userKtW","userTtP"]#10

#10个  userItemHistory可有可无
_COLUMNS_NEED_SPILIT = ["userItemHistory", "userKeywordHistory", "userKeyword2History", "userTag1History",
                        "userTag2History", "userTag3History", "userKs1History", "userKs2History"]

def main():
    # create the RPC stub
    # channel = implementations.insecure_channel("127.0.0.1", int(8502))
    # stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    #
    # # create the request object and set the name and signature_name params
    # request = predict_pb2.PredictRequest()
    # request.model_spec.name = 'din'
    # request.model_spec.signature_name = 'predict'

    channel = grpc.insecure_channel(FLAGS.server)
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    request = predict_pb2.PredictRequest()
    print(FLAGS.model)
    request.model_spec.name = FLAGS.model
    request.model_spec.signature_name = 'serving_default'

    requests = []

    file = open("209", 'r', encoding='UTF-8')
    i = 0
    try:
        while True and (i < 200):
            text_line = file.readline()
            if text_line:
                a = eval(text_line)
                b = {}
                for j in range(76):
                    b[_CSV_COLUMNS[j]] = [a[j]]
                i = i + 1
                print("b:",b)
                c = ['userKeywordHistory','userKeyword2History','userTag1History',
'userTag2History','userTag3History','userKs1History','userKs2History',"userTtP","userKtW"]
                for item in c:
                    print("item:",item)
                    tmp = b[item]
                    print("tmp:",tmp)
                    if tmp[0] is not '':
                        b[item] = [list(eval(tmp[0]))]
                    else:
                        b[item]=tmp[0]
                # print(b['userTag1History'])
                requests.append(b)
            else:
                break
    finally:
        file.close()


    # read file and get value
    num = 1
    with tf.io.TFRecordWriter("tf_serving_warmup_requests") as writer:
        for i in range(0, 1):
            request = predict_pb2.PredictRequest(
                model_spec=model_pb2.ModelSpec(name="din_pregrant_posts_3", signature_name="serving_default"),
                inputs={
                    # "time_weight": tf.make_tensor_proto([10] * num, shape=[num],
                    #                                         dtype=tf.int32),

                    "userEntrance": tf.make_tensor_proto(requests[i]['userEntrance'] * num,shape=[num] , dtype=tf.int32),
                    "userRequestTime": tf.make_tensor_proto(requests[i]['userRequestTime'] * num,shape=[num] ,dtype=tf.int32),
                    "userRequestWeek": tf.make_tensor_proto(requests[i]['userRequestWeek']* num,shape=[num] , dtype=tf.int32),
                    "userOs": tf.make_tensor_proto(requests[i]['userOs']* num,shape=[num] , dtype=tf.int32),
                    "userApn": tf.make_tensor_proto(requests[i]['userApn']* num,shape=[num] , dtype=tf.int32),
                    "userUa": tf.make_tensor_proto(requests[i]['userUa']* num,shape=[num] , dtype=tf.int32),
                    "userMode": tf.make_tensor_proto(requests[i]['userMode']* num,shape=[num] , dtype=tf.int32),
                    "userProvince": tf.make_tensor_proto(requests[i]['userProvince']* num,shape=[num] , dtype=tf.int32),
                    "userCity": tf.make_tensor_proto(requests[i]['userCity']* num,shape=[num] , dtype=tf.int32),
                    "userCityLevel": tf.make_tensor_proto(requests[i]['userCityLevel']* num,shape=[num] , dtype=tf.int32),


                    "userMarr": tf.make_tensor_proto(requests[i]['userMarr']* num,shape=[num] , dtype=tf.int32),
                    "userAge": tf.make_tensor_proto(requests[i]['userAge']* num,shape=[num] , dtype=tf.int32),
                    "userGestat_week": tf.make_tensor_proto(requests[i]['userGestat_week']* num,shape=[num], dtype=tf.int32),
                    "userAgeRange": tf.make_tensor_proto(requests[i]['userAgeRange']* num,shape=[num] , dtype=tf.int32),
                    "userBage": tf.make_tensor_proto(requests[i]['userBage']* num,shape=[num] , dtype=tf.int32),
                    "userAppV": tf.make_tensor_proto(requests[i]['userAppV']* num,shape=[num] , dtype=tf.int32),
                    "userCliN_Inc": tf.make_tensor_proto(requests[i]['userCliN_Inc']* num,shape=[num] , dtype=tf.int32),
                    "userShoN_Inc": tf.make_tensor_proto(requests[i]['userShoN_Inc']* num,shape=[num] , dtype=tf.int32),
                    "userBotActCt": tf.make_tensor_proto(requests[i]['userBotActCt']* num,shape=[num] , dtype=tf.int32),
                    "userTotalTime": tf.make_tensor_proto(requests[i]['userTotalTime']* num,shape=[num] , dtype=tf.int32),

                    "userView2BottomTimes": tf.make_tensor_proto(requests[i]['userView2BottomTimes']* num,shape=[num] , dtype=tf.int32),
                    "userEffTimes": tf.make_tensor_proto(requests[i]['userEffTimes']* num,shape=[num] , dtype=tf.int32),
                    "userFirstRequest": tf.make_tensor_proto(requests[i]['userFirstRequest']* num,shape=[num] , dtype=tf.int32),
                    "userAppLTag": tf.make_tensor_proto(requests[i]['userAppLTag']* num,shape=[num] , dtype=tf.int32),
                    "itemAlgSource": tf.make_tensor_proto(requests[i]['itemAlgSource']* num,shape=[num] , dtype=tf.int32),
                    "itemTexL": tf.make_tensor_proto(requests[i]['itemTexL']* num,shape=[num] , dtype=tf.int32),
                    "itemKwN": tf.make_tensor_proto(requests[i]['itemKwN']* num,shape=[num] , dtype=tf.int32),
                    "itemTitL": tf.make_tensor_proto(requests[i]['itemTitL']* num,shape=[num] , dtype=tf.int32),
                    "itemTwN": tf.make_tensor_proto(requests[i]['itemTwN']* num,shape=[num] , dtype=tf.int32),
                    "itemImgN": tf.make_tensor_proto(requests[i]['itemImgN']* num,shape=[num] , dtype=tf.int32),

                    "itemSour": tf.make_tensor_proto(requests[i]['itemSour']* num,shape=[num] , dtype=tf.int32),
                    "itemCreT": tf.make_tensor_proto(requests[i]['itemCreT']* num,shape=[num] , dtype=tf.int32),
                    "itemCliN_Inc": tf.make_tensor_proto(requests[i]['itemCliN_Inc']* num,shape=[num] , dtype=tf.int32),
                    "itemShoN_Inc": tf.make_tensor_proto(requests[i]['itemShoN_Inc']* num,shape=[num] , dtype=tf.int32),
                    "itemRevi": tf.make_tensor_proto(requests[i]['itemRevi']* num,shape=[num] , dtype=tf.int32),
                    "itemColN": tf.make_tensor_proto(requests[i]['itemColN']* num,shape=[num] , dtype=tf.int32),
                    "itemShare": tf.make_tensor_proto(requests[i]['itemShare']* num,shape=[num] , dtype=tf.int32),
                    "itemVreN": tf.make_tensor_proto(requests[i]['itemVreN']* num,shape=[num] , dtype=tf.int32),
                    "itemLireN": tf.make_tensor_proto(requests[i]['itemLireN']* num,shape=[num] , dtype=tf.int32),
                    "itemLike": tf.make_tensor_proto(requests[i]['itemLike']* num,shape=[num] , dtype=tf.int32),

                    "itemEffUsers": tf.make_tensor_proto(requests[i]['itemEffUsers']* num,shape=[num] , dtype=tf.int32),
                    "itemView2BottomTimes": tf.make_tensor_proto(requests[i]['itemView2BottomTimes']* num,shape=[num] , dtype=tf.int32),
                    "itemTotalTime": tf.make_tensor_proto(requests[i]['itemTotalTime']* num,shape=[num] , dtype=tf.int32),
                    "itemBotSum": tf.make_tensor_proto(requests[i]['itemBotSum']* num,shape=[num] , dtype=tf.int32),
                    "itemMt": tf.make_tensor_proto(requests[i]['itemMt'] * num, shape=[num], dtype=tf.int32),
                    "itemContentH": tf.make_tensor_proto(requests[i]['itemContentH']* num,shape=[num] , dtype=tf.double),
                    "itemCtr": tf.make_tensor_proto(requests[i]['itemCtr']* num,shape=[num] , dtype=tf.double),
                    "itemAvsT": tf.make_tensor_proto(requests[i]['itemAvsT']* num,shape=[num] , dtype=tf.double),
                    "itemFiR": tf.make_tensor_proto(requests[i]['itemFiR']* num,shape=[num] , dtype=tf.double),
                    "itemTimeScore": tf.make_tensor_proto(requests[i]['itemTimeScore']* num,shape=[num] , dtype=tf.double),

                    "itemBotSumCliR": tf.make_tensor_proto(requests[i]['itemBotSumCliR']* num,shape=[num] , dtype=tf.double),
                    "itemSexW": tf.make_tensor_proto(requests[i]['itemSexW']* num,shape=[num] , dtype=tf.double),
                    "itemSuperstiW": tf.make_tensor_proto(requests[i]['itemSuperstiW']* num,shape=[num] , dtype=tf.double),
                    "itemLowTitleW": tf.make_tensor_proto(requests[i]['itemLowTitleW']* num,shape=[num] , dtype=tf.double),

                    "itemKtW": tf.make_tensor_proto(requests[i]['itemKtW']* num,shape=[num] , dtype=tf.int32),
                    "itemKtW2": tf.make_tensor_proto(requests[i]['itemKtW2']* num,shape=[num] , dtype=tf.int32),
                    "itemTag1": tf.make_tensor_proto(requests[i]['itemTag1']* num,shape=[num] , dtype=tf.int32),
                    "itemTag2": tf.make_tensor_proto(requests[i]['itemTag2']* num,shape=[num] , dtype=tf.int32),
                    "itemTag3": tf.make_tensor_proto(requests[i]['itemTag3']* num,shape=[num] , dtype=tf.int32),
                    "itemKs1": tf.make_tensor_proto(requests[i]['itemKs1']* num,shape=[num] , dtype=tf.int32),
                    "itemKs2": tf.make_tensor_proto(requests[i]['itemKs2'] * num, shape=[num], dtype=tf.int32),

                    "userKeywordHistory": tf.make_tensor_proto(requests[i]['userKeywordHistory'] * num ,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
                    "userKeyword2History": tf.make_tensor_proto(requests[i]['userKeyword2History']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
                    "userTag1History": tf.make_tensor_proto(requests[i]['userTag1History']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
                    "userTag2History": tf.make_tensor_proto(requests[i]['userTag2History']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
                    "userTag3History": tf.make_tensor_proto(requests[i]['userTag3History']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
                    "userKs1History": tf.make_tensor_proto(requests[i]['userKs1History']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
                    "userKs2History": tf.make_tensor_proto(requests[i]['userKs2History']* num, shape=[num,requests[i]['userHisL'][0]],dtype=tf.int32),
                    # "userTtP": tf.make_tensor_proto(requests[i]['userTtP']* num,shape=[num,requests[i]['userHisL'][0]], dtype=tf.int32),
                    # "userKtW": tf.make_tensor_proto(requests[i]['userKtW']* num, shape=[num,requests[i]['userHisL'][0]],dtype=tf.int32),
                    "userHisL": tf.make_tensor_proto(requests[i]['userHisL'] * num, shape=[num], dtype=tf.int32),

            })
            log = prediction_log_pb2.PredictionLog(predict_log=prediction_log_pb2.PredictLog(request=request))
            writer.write(log.SerializeToString())
            print(request)
            print(requests[i]['userKeywordHistory'])


    time_start = datetime.datetime.utcnow()
    for i in range(1000):
        response = stub.Predict.future(request, 30.)
    time_end = datetime.datetime.utcnow()
    time_elapsed_sec = (time_end - time_start).total_seconds()

    print('Total elapsed time: {} seconds'.format(time_elapsed_sec))
    print('Time for batch size {} repeated {} times'.format(1, 1000))
    print('Average latency per batch: {} seconds'.format(time_elapsed_sec / 1000))
    print(response)
    prediction = response.result()
    print(prediction)

if __name__ == "__main__":
    main()

导出模型是的代码如下

def export_model(model, export_dir, checkpoint_path):
    """Export to SavedModel format.

    Args:
    model_din: Estimator object
    export_dir: directory to export the model_din.
    model_column_fn: Function to generate model_din feature columns.
    """
    #54个特征
    feature_name = [ 'userEntrance', 'userRequestTime', 'userRequestWeek', 'userOs', 'userApn', 'userUa','userMode', 'userProvince', 'userCity', 'userCityLevel',
                     'userMarr', 'userAge','userGestat_week', 'userAgeRange', 'userBage', 'userAppV', 'userCliN_Inc', 'userShoN_Inc', 'userBotActCt', 'userTotalTime',
                    'userView2BottomTimes', 'userEffTimes', 'userFirstRequest', 'userAppLTag', 'itemAlgSource', 'itemTexL', 'itemKwN', 'itemTitL', 'itemTwN','itemImgN',
                     'itemSour', 'itemCreT', 'itemCliN_Inc', 'itemShoN_Inc', 'itemRevi', 'itemColN', 'itemShare', 'itemVreN', 'itemLireN', 'itemLike',
                    'itemEffUsers', 'itemView2BottomTimes','itemTotalTime', 'itemBotSum','itemMt','itemContentH', 'itemCtr', 'itemAvsT', 'itemFiR', 'itemTimeScore','itemBotSumCliR',
                     'itemSexW','itemSuperstiW', 'itemLowTitleW']

    serving_features = {}
    for item in feature_name:
        double_feature = ["itemContentH", "itemCtr", "itemAvsT", "itemFiR", "itemTimeScore",
                          "itemBotSumCliR", "itemSexW", "itemSuperstiW", "itemLowTitleW"]
        if item in double_feature:
            serving_features[item] = tf.placeholder(tf.double, [None, ], name=item)
        else:
            serving_features[item] = tf.placeholder(tf.int32, [None, ], name=item)

    #18个
    serving_features1 = {
        # "itemTtp": tf.placeholder(tf.int32, [None, ], name='itemTtp'),
        "itemKtW": tf.placeholder(tf.int32, [None, ], name='keyword'),
        "itemKtW2": tf.placeholder(tf.int32, [None, ], name='keyword2'),
        "itemTag1": tf.placeholder(tf.int32, [None, ], name='tag1'),
        "itemTag2": tf.placeholder(tf.int32, [None, ], name='tag2'),
        "itemTag3": tf.placeholder(tf.int32, [None, ], name='tag3'),
        "itemKs1": tf.placeholder(tf.int32, [None, ], name='ks1'),
        "itemKs2": tf.placeholder(tf.int32, [None, ], name='ks2'),


        "userKeywordHistory": tf.placeholder(tf.int32, [None, None], name='hist_keyword'),
        "userKeyword2History": tf.placeholder(tf.int32, [None, None], name='hist_keyword2'),
        "userTag1History": tf.placeholder(tf.int32, [None, None], name='hist_tag1'),
        "userTag2History": tf.placeholder(tf.int32, [None, None], name='hist_tag2'),
        "userTag3History": tf.placeholder(tf.int32, [None, None], name='hist_tag3'),
        "userKs1History": tf.placeholder(tf.int32, [None, None], name='hist_ks1'),
        "userKs2History": tf.placeholder(tf.int32, [None, None], name='hist_ks2'),
        # "userTtP": tf.placeholder(tf.int32, [None, None], name='userTtP'),
        # "userKtW": tf.placeholder(tf.int32, [None, None], name='userKtW'),
        "userHisL": tf.placeholder(tf.int32, [None, ], name='sl')
    }

    serving_features.update(serving_features1)
    #构建一个serving_input_receiver_fn期望特征张量
    example_input_fn = (tf.estimator.export.build_raw_serving_input_receiver_fn(serving_features))
    # 导出为Tensorflow SavedModel
    return model.export_savedmodel(export_dir, example_input_fn, checkpoint_path=checkpoint_path,assets_extra={'tf_serving_warmup_requests': '/data/supeihuang/din_209_data/data_item/tf_serving_warmup_requests'})

 

你可能感兴趣的:(tensorflow tf-serving 进行warmup设置)