受Transformer在自然语言处理中取得巨大的效果启发,BST将应用Transformer 用于提取用户行为序列背后的隐藏信息,同时考虑序列的前后顺序,能够更好的表达用户兴趣。
import tensorflow as tf
from tensorflow import keras
from utils import *
import numpy as np
EPOCH = 10
BATCH_SIZE = 32
VEC_DIM = 16
DROPOUT_RATE = 0.2
HEAD_NUM = 8
HIDE_SIZE = 32
LAYER_NUM = 3
DNN_LAYERS = [1024, 512, 256]
data, max_user_id, max_item_id = load_data()
# 行为特征个数
BEHAVIOR_FEAT_NUM = 32
K = tf.keras.backend
class MultiHeadSelfAttention(keras.layers.Layer):
def __init__(self, embed_dim, num_heads=8):
super(MultiHeadSelfAttention, self).__init__()
self.embed_dim = embed_dim
self.num_heads = num_heads
if embed_dim % num_heads != 0:
raise ValueError(
f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
)
self.projection_dim = embed_dim // num_heads
self.query_dense = keras.layers.Dense(embed_dim)
self.key_dense = keras.layers.Dense(embed_dim)
self.value_dense = keras.layers.Dense(embed_dim)
self.combine_heads = keras.layers.Dense(embed_dim)
def attention(self, query, key, value):
score = tf.matmul(query, key, transpose_b=True)
key_dim = tf.cast(tf.shape(key).shape[-1], tf.float32)
scaled_score = score / tf.math.sqrt(key_dim)
weights = tf.nn.softmax(scaled_score, axis=-1)
output = tf.matmul(weights, value)
return output, weights
def separate_heads(self, x, batch_size):
x = tf.reshape(x, shape=(batch_size, -1, self.num_heads, self.projection_dim))
return tf.transpose(x, perm=[0, 2, 1, 3])
def call(self, inputs, **kwargs):
batch_size = tf.shape(inputs)[0]
query = self.query_dense(inputs)
query = self.separate_heads(query, batch_size)
key = self.key_dense(inputs)
key = self.separate_heads(key, batch_size)
value = self.value_dense(inputs)
value = self.separate_heads(value, batch_size)
attention, weights = self.attention(query, key, value)
attention = tf.transpose(attention, perm=[0, 2, 1, 3]) # [batch_size,seq_len,num_heads,projection_dim]
attention = tf.reshape(attention, shape=(batch_size, -1, self.embed_dim))
return self.combine_heads(attention)
class TransformerBlock(keras.layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim):
super(TransformerBlock, self).__init__()
self.att = MultiHeadSelfAttention(embed_dim, num_heads)
self.ffn = keras.Sequential([keras.layers.Dense(ff_dim, activation="relu"),
keras.layers.Dense(embed_dim)])
self.layernorm1 = keras.layers.LayerNormalization()
self.layernorm2 = keras.layers.LayerNormalization()
self.dropout1 = keras.layers.Dropout(DROPOUT_RATE)
self.dropout2 = keras.layers.Dropout(DROPOUT_RATE)
def call(self, inputs, **kwargs):
attn_output = self.att(inputs)
attn_output = self.dropout1(attn_output)
out1 = self.layernorm1(inputs + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output)
return self.layernorm2(out1 + ffn_output)
def run():
# 将所有的特征的各个类别值统一id化。x中每行为各特征的类别值的id
train_user_id_data, train_now_item_id_data, train_item_ids_data, train_rating_ids_data, train_y_data, \
test_user_id_data, test_now_item_id_data, test_item_ids_data, test_rating_ids_data, test_y_data = get_all_data(data)
user_id = keras.Input((1,))
now_item_id = keras.Input((1,))
items_ids = keras.Input((BEHAVIOR_FEAT_NUM,))
ratings_ids = keras.Input((BEHAVIOR_FEAT_NUM,))
usr_emb = keras.layers.Embedding(max_user_id + 1, VEC_DIM, input_length=1)(user_id) # [-1,1,vec_dim]
usr_emb = keras.layers.Flatten()(usr_emb) # [-1,vec_dim]
now_item_emb = keras.layers.Embedding(max_item_id + 1, VEC_DIM, input_length=1)(now_item_id) # [-1,1,vec_dim]
# TokenAndPositionEmbedding
items_emb = keras.layers.Embedding(max_item_id + 1, VEC_DIM, input_length=BEHAVIOR_FEAT_NUM)(
items_ids) # [-1,BEA_FEAT_NUM,vec_dim]
ratings_emb = keras.layers.Embedding(6, VEC_DIM, input_length=BEHAVIOR_FEAT_NUM)(
ratings_ids) # [-1,BEA_FEAT_NUM,vec_dim]
token_embedding = items_emb * ratings_emb # [-1,BEA_FEAT_NUM,vec_dim]
token_embedding = keras.layers.concatenate([token_embedding, now_item_emb], axis=1) # [-1,BEA_FEAT_NUM + 1,vec_dim]
positions = tf.range(start=0, limit=BEHAVIOR_FEAT_NUM + 1, delta=1)
positions_embedding = keras.layers.Embedding(BEHAVIOR_FEAT_NUM + 1, VEC_DIM,
input_length=BEHAVIOR_FEAT_NUM + 1)(positions)
token_and_positions_embedding = token_embedding + positions_embedding
# transformer_layer = TransformerBlock(embed_dim=VEC_DIM, num_heads=HEAD_NUM, ff_dim=HIDE_SIZE)(
# token_and_positions_embedding) # [-1,BEA_FEAT_NUM + 1,vec_dim]
transformer_layer = MultiHeadSelfAttention(embed_dim=VEC_DIM, num_heads=HEAD_NUM)(
token_and_positions_embedding) # [-1,BEA_FEAT_NUM + 1,vec_dim]
transformer_layer = tf.reshape(transformer_layer, shape=(-1, BEHAVIOR_FEAT_NUM + 1, VEC_DIM))
print(np.shape(transformer_layer))
transformer_layer = keras.layers.Flatten()(transformer_layer)
deep = keras.layers.concatenate([usr_emb] + [transformer_layer])
deep = keras.layers.Dropout(DROPOUT_RATE)(deep)
for units in DNN_LAYERS:
deep = keras.layers.Dense(units)(deep)
deep = keras.layers.LeakyReLU()(deep)
deep = keras.layers.Dropout(DROPOUT_RATE)(deep)
outputs = keras.layers.Dense(1, activation='sigmoid')(deep)
model = keras.Model(inputs=[user_id, now_item_id, items_ids, ratings_ids], outputs=outputs)
model.compile(loss='binary_crossentropy', optimizer=tf.train.AdamOptimizer(0.001), metrics=[keras.metrics.AUC()])
tbCallBack = keras.callbacks.TensorBoard(log_dir='./logs',
histogram_freq=0,
write_graph=True,
write_grads=True,
write_images=True,
embeddings_freq=0,
embeddings_layer_names=None,
embeddings_metadata=None)
model.fit([train_user_id_data, train_now_item_id_data, train_item_ids_data, train_rating_ids_data], train_y_data,
batch_size=BATCH_SIZE, epochs=EPOCH, verbose=2,
validation_data=(
[test_user_id_data, test_now_item_id_data, test_item_ids_data, test_rating_ids_data], test_y_data),
callbacks=[tbCallBack], workers=4)
run()