由于自己编译,版本很重要。package下载出错时,多运行几遍。
bazel安装:
bazel 0.16.1
bash bazel-0.16.1-installer-linux-x86_64.sh
export PATH="$PATH:$HOME/bin"
tf serving安装:
tf serving 1.12.0.zip
在WORKSPACE中 http_archive( 行前添加:
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
#编译cpu版本:
bazel build tensorflow_serving/model_servers:tensorflow_model_server
#编译gpu版本:
export TF_NEED_CUDA=1
export TF_CUDA_VERSION=10.0
export TF_CUDNN_VERSION=7
bazel build --config=cuda --copt="-fPIC" tensorflow_serving/model_servers:tensorflow_model_server
如果nccl报错:
export NCCL_HDR_PATH=/usr/local/nccl-2.4/include
export NCCL_INSTALL_PATH=/usr/local/nccl-2.4/lib
export TF_NCCL_VERSION=2.4.2
export LD_LIBRARY_PATH=/usr/local/nccl-2.4/lib:$LD_LIBRARY_PATH
运行服务:
bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server --port=9000 --rest_api_port=9001 --model_name=test --model_base_path=/mnt/ad_relevance/reimgdata/
测试:
curl http://localhost:9001/v1/models/test # test为模型名称
import sys
import cv2
import time
import numpy as np
import tensorflow as tf
from grpc.beta import implementations
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2
num_per_request = 300
size = 299
central_fraction = 0.875
HOST = "10.22.151.155"
PORT = 9000
channel = implementations.insecure_channel(HOST, PORT)
stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
def preprocess(img):
print(img.shape)
# img = img.astype(np.float32)
h, w, c = img.shape
# img = img * 1. / 255.
bbox_start_h = int( (h - h * central_fraction) / 2 )
bbox_start_w = int( (w - w * central_fraction) / 2 )
bbox_end_h = bbox_start_h + h - bbox_start_h * 2
bbox_end_w = bbox_start_w + w - bbox_start_w * 2
img = img[bbox_start_h:bbox_end_h, bbox_start_w:bbox_end_w, :]
print(img.shape)
img = cv2.resize(img, (size, size), interpolation=cv2.INTER_LINEAR)
img = img.astype(np.uint8)
# img = (img - 0.5) * 2
print(img.shape)
# if use_fp:
# img = np.array(img).astype(np.float16)
return img[:,:,::-1]
def get(name, input_ids=0, input_mask=0, segment_ids=0, pos_input_ids=0, pos_input_mask=0,
pos_segment_ids=0, pos_image=0.0 ):
request = predict_pb2.PredictRequest()
request.model_spec.name = 'test'
request.model_spec.signature_name = name
# request.inputs['pos_input_ids'].CopyFrom(tf.contrib.util.make_tensor_proto(pos_input_ids))
# request.inputs['pos_input_mask'].CopyFrom(tf.contrib.util.make_tensor_proto(pos_input_mask))
# request.inputs['pos_segment_ids'].CopyFrom(tf.contrib.util.make_tensor_proto(pos_segment_ids))
if name=="serving_default":
request.inputs['input_ids'].CopyFrom(tf.contrib.util.make_tensor_proto(input_ids))
request.inputs['input_mask'].CopyFrom(tf.contrib.util.make_tensor_proto(input_mask))
request.inputs['segment_ids'].CopyFrom(tf.contrib.util.make_tensor_proto(segment_ids))
else:
request.inputs['image_list'].CopyFrom(tf.contrib.util.make_tensor_proto(pos_image))
result = stub.Predict(request, 1200)
#print(result.outputs["image_emb"].float_val)
return result
def loop_request(func, **args):
global total
start = time.time()
error = 0
for i in range(total):
print(i)
try:
func(**args)
except Exception as e:
print(e)
error += 1
continue
end = time.time()
print("time_rate: ", (end-start)/total, "error_rate: ", error/total)
def test_cnn():
#pos_image = np.full((1,299,299,3), 128).astype(np.float32)
pos_image = cv2.imread("/data00/home/huangqingkang/imgs/1aacb000b0d93c755f995")
pos_image = preprocess(pos_image)
pos_image = np.expand_dims(pos_image, 0)
pos_image = np.tile(pos_image, [num_per_request, 1, 1, 1])
loop_request(get, name="serving_cnn", pos_image=pos_image)
def test_bert():
# pos_input_ids = np.full((num_per_request,32), 0).astype(np.int32)
# pos_input_mask = np.full((num_per_request,32), 0).astype(np.int32)
# pos_segment_ids = np.full((num_per_request,32), 0).astype(np.int32)
input_ids = np.full((num_per_request+1,32), 0).astype(np.int32)
input_mask = np.full((num_per_request+1,32), 0).astype(np.int32)
segment_ids = np.full((num_per_request+1,32), 0).astype(np.int32)
pos_image = np.array([0], dtype=np.uint8)
# if use_fp:
# pos_image = np.array(pos_image).astype(np.float16)
loop_request(get, name="serving_default", input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids,
pos_image = pos_image)
# pos_input_ids = pos_input_ids, pos_input_mask = pos_input_mask, pos_segment_ids = pos_segment_ids,
def get_cnn_emb(pos_image):
result = get(name="serving_cnn", pos_image=pos_image)
return result.output["image_emb"].float_val
# def get_bert_emb(**args):
if __name__ == "__main__":
total = int(sys.argv[2])
if int(sys.argv[3]):
num_per_request = int(sys.argv[3])
use_fp = int(sys.argv[4])
if sys.argv[1] == "cnn":
test_cnn()
elif sys.argv[1] == "bert":
test_bert()
其他:
TensorFlow serving 安装教程与使用(2)
tf_serving-模型训练、导出、部署(解析)
tf43:tensorflow Serving gRPC 部署实例
tensorflow tfserving 部署多个模型、使用不同版本的模型