离散特征
离散特征之间做组合
不输入有连续值特征的,至少在W&D的paper里面是这样使用的。
raw input+embeding处理
对非连续值之外的特征做embedding处理,这里都是策略特征,就是乘以个embedding-matrix。在
注:训练:notice: Wide部分用FTRL来训练;Deep部分用AdaGrad来训练。
代码如下:
此处生成WDL文件
import tensorflow as tf
# - 1、构建TFRecords的输入数据
# - 2、使用模型进行特征列指定
# - 3、模型训练以及预估
FEATURE_COLUMN = ['channel_id', 'vector', 'user_weights', 'article_weights']
class WDL(object):
"""wide&deep训练排序模型
"""
def __init__(self):
pass
@staticmethod
def get_tfrecords_data():
def parse_example_function(exmaple):
"""解析每个样本的example
:param exmaple:
:return:
"""
# 定义解析格式,parse_single_example
features = {
'label': tf.FixedLenFeature([], tf.int64),
'feature': tf.FixedLenFeature([], tf.string)
}
label_feature = tf.parse_single_example(exmaple, features)
# 修改其中的特征类型和形状
# 解码 [121]
# feature = tf.reshape(tf.decode_raw(label_feature['feature'], tf.float32), [1, 121])
f = tf.decode_raw(label_feature['feature'], tf.float64)
feature = tf.reshape(tf.cast(f, tf.float32), [1, 121])
# 计算其中向量、用户权重、文章权重的平均值
channel_id = tf.cast(tf.slice(feature, [0, 0], [1, 1]), tf.int32)
vector = tf.reduce_sum(tf.slice(feature, [0, 1], [1, 100]), axis=1)
user_weights = tf.reduce_sum(tf.slice(feature, [0, 101], [1, 10]), axis=1)
article_weights = tf.reduce_sum(tf.slice(feature, [0, 111], [1, 10]), axis=1)
# 4个特征值进行名称构造字典
data = [channel_id, vector, user_weights, article_weights]
feature_dict = dict(zip(FEATURE_COLUMN, data))
label = tf.cast(label_feature['label'], tf.int32)
return feature_dict, label
# Tfrecord dataset读取数据
dataset = tf.data.TFRecordDataset(['datas/train_ctr_20190605.tfrecords'])
# map 解析
dataset = dataset.map(parse_example_function)
dataset = dataset.batch(64)
dataset = dataset.repeat(10)
return dataset
def train_eval(self):
"""
进行训练pnggu
:return:
"""
# 指定wide和deep两边的feature_column
# wide, channel_id如果就是一个类别具体的数字
# num_buckets必须填写
channel_id = tf.feature_column.categorical_column_with_identity('channel_id', num_buckets=25)
wide_columns = [channel_id]
# deep ID必须embedding结果,数值型列
# tf.feature_column.embedding_column()或则input_layer
vector = tf.feature_column.numeric_column('vector')
user_weights = tf.feature_column.numeric_column('user_weights')
article_weights = tf.feature_column.numeric_column('article_weights')
deep_columns = [tf.feature_column.embedding_column(channel_id, dimension=25),
vector, user_weights, article_weights]
# 模型输入训练
model = tf.estimator.DNNLinearCombinedClassifier(model_dir="./ckpt/wide_and_deep/",
linear_feature_columns=wide_columns,
dnn_feature_columns=deep_columns,
dnn_hidden_units=[1024, 512, 256])
model.train(WDL.get_tfrecords_data, steps=1)
result = model.evaluate(WDL.get_tfrecords_data)
# {'accuracy': 0.9046435, 'accuracy_baseline': 0.9046434, 'auc': 0.57673496, 'auc_precision_recall': 0.12006451, 'average_loss': 0.38107494, 'label/mean': 0.095356554, 'loss': 24.18823, 'precision': 0.0, 'prediction/mean': 0.2390636, 'recall': 0.0, 'global_step': 1}
print(result)
# 模型导入
columns = wide_columns + deep_columns
feature_spec = tf.feature_column.make_parse_example_spec(columns)
serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
model.export_savedmodel("./serving_model/wdl/", serving_input_receiver_fn)
if __name__ == '__main__':
wdl = WDL()
# print(lw.get_tfrecords_data())
wdl.train_eval()
TensorFlow Serving是一种灵活的高性能服务系统,适用于机器学习模型,专为生产环境而设计。TensorFlow Serving可以轻松部署新算法和实验,同时保持相同的服务器架构和API。TensorFlow Serving提供与TensorFlow模型的开箱即用集成,但可以轻松扩展以提供其他类型的模型和数据。
TensorFlow Serving部署
docker pull tensorflow/serving
docker images
docker run -p 8501:8501 -p 8500:8500 --mount type=bind,source=/home/ubuntu/detectedmodel/commodity,target=/models/commodity -e MODEL_NAME=commodity -t tensorflow/serving
说明:
-p 8501:8501 为端口映射,-p 主机端口:docker容器程序(tf serving)使用端口,访问主机8501端口就相当于访问了tf serving程序的8501端口
tf serving 使用8501端口对外提供HTTP服务,使用8500对外提供gRPC服务,这里同时开放了两个端口的使用
–mount type=bind,source=/home/ubuntu/detectedmodel/commodity,target=/models/commodity 为文件映射,将主机(source)的模型文件映射到docker容器程序(target)的位置,以便tf serving使用模型,target参数为/models/我的模型
-e MODEL_NAME=commodity设置了一个环境变量,名为MODEL_NAME,此变量被tf serving读取,用来按名字寻找模型,与上面target参数中我的模型对应
-t 为tf serving创建一个伪终端,供程序运行
tensorflow/serving为镜像名
wdl模型服务运行
docker run -p 8501:8501 -p 8500:8500 --mount type=bind,source=/root/toutiao_project/reco_sys/server/models/serving_model/wdl,target=/models/wdl -e MODEL_NAME=wdl -t tensorflow/serving
itcast:~$ docker ps
import tensorflow as tf
from grpc.beta import implementations
from tensorflow_serving.apis import prediction_service_pb2_grpc
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import classification_pb2
import os
import sys
import grpc
from server.utils import HBaseUtils
from server import pool
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
print(BASE_DIR)
sys.path.insert(0, os.path.join(BASE_DIR))
def wdl_sort_service():
"""
wide&deep进行排序预测
:param reco_set:
:param temp:
:param hbu:
:return:
"""
hbu = HBaseUtils(pool)
# 排序
# 1、读取用户特征中心特征
try:
user_feature = eval(hbu.get_table_row('ctr_feature_user',
'{}'.format(1115629498121846784).encode(),
'channel:{}'.format(18).encode()))
# logger.info("{} INFO get user user_id:{} channel:{} profile data".format(
# datetime.now().strftime('%Y-%m-%d %H:%M:%S'), temp.user_id, temp.channel_id))
except Exception as e:
user_feature = []
if user_feature:
# 2、读取文章特征中心特征
result = []
# examples
examples = []
for article_id in [17749, 17748, 44371, 44368]:
try:
article_feature = eval(hbu.get_table_row('ctr_feature_article',
'{}'.format(article_id).encode(),
'article:{}'.format(article_id).encode()))
except Exception as e:
article_feature = [0.0] * 111
channel_id = int(article_feature[0])
# 求出后面若干向量的平均值
vector = np.mean(article_feature[11:])
# 第三个用户权重特征
user_feature = np.mean(user_feature)
# 第四个文章权重特征
article_feature = np.mean(article_feature[1:11])
# 组建example
example = tf.train.Example(features=tf.train.Features(feature={
"channel_id": tf.train.Feature(int64_list=tf.train.Int64List(value=[channel_id])),
"vector": tf.train.Feature(float_list=tf.train.FloatList(value=[vector])),
'user_weigths': tf.train.Feature(float_list=tf.train.FloatList(value=[user_feature])),
'article_weights': tf.train.Feature(float_list=tf.train.FloatList(value=[article_feature])),
}))
examples.append(example)
with grpc.insecure_channel('127.0.0.1:8500') as channel:
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
# 获取测试数据集,并转换成 Example 实例
# 准备 RPC 请求,指定模型名称。
request = classification_pb2.ClassificationRequest()
request.model_spec.name = 'wdl'
request.input.example_list.examples.extend(examples)
# 获取结果
response = stub.Classify(request, 10.0)
print(response)
return None
if __name__ == '__main__':
wdl_sort_service()