① 单个ID(用户、物品)的embedding
② 融入个人历史信息的embedding(一阶连通性)
③ 利用整体交互图的embedding(高阶连通性)
(PS:模型概述:模型设置 K 个意图,每个意图都对应一个子图。在 K 个子图上分别学习用户和物品的嵌入,最后组合起来形成最终的嵌入。)
定义了一组得分矩阵{Sk|∀k∈ {1,··,K}}。注意图感知矩阵Sk,每个条目Sk(u,i)表示用户u和项目i之间的交互。
表示用户 u 在子图 k 上的 1 阶聚合信息,中间使用了邻居路由机制来迭代更新
在GCN的某一层中,其迭代更新的过程为:子图邻域聚合 → 得到用户(物品)嵌入 → 调整子图连边权重 → 子图邻域聚合 → ....
对于目标交互(u,i),我们有得分向量,比如{Sk(u,i)|∀k∈ {1,··,K}}。为了获得其在所有意图上的分布,我们随后通过softmax函数对这些系数进行归一化:
基于新计算出的嵌入, 更新
至此一次迭代计算就完成了,接下来基于新的邻接矩阵重复上述步骤。当 T 次迭代都结束时,得到用户在当前GCN层的嵌入
在经过 L 层的传播聚合之后,将不同层得到的嵌入相加得到最终表示:
n_users=4874, n_items=2406
n_train=37027, n_test=15930, sparsity=0.00452
************************* Run with following settings ***************************
Namespace(Ks='[20, 40, 60, 80, 100]', batch_size=1024, corDecay=0.01, cor_flag=1, data_path='../Data/', dataset='office', early=10, embed_name='', embed_size=128, epoch=2000, gpu='2', layer_size='[128]', lr=0.001, n_factors=4, n_iterations=2, n_layers=1, pick=0, pick_scale=10000000000.0, pretrain=0, proj_path='', regs='[1e-3,1e-4,1e-4]', save_flag=0, save_name='best_model', show_step=3, test_flag='part', verbose=1)
already load adj matrix (7280, 7280) 0.0299530029296875
plain_adj, norm_adj, mean_adj, pre_adj = data_generator.get_adj_mat()
all_h_list, all_t_list, all_v_list = load_adjacency_list_data(plain_adj)#得到邻接矩阵中的 row col value
A_values_init = create_initial_A_values(args.n_factors, all_v_list)#对每一个u-i交互默认初始意图为1 4*74054
config['norm_adj'] = plain_adj
config['all_h_list'] = all_h_list
config['all_t_list'] = all_t_list
def _create_star_routing_embed_with_P(self, pick_ = False):
pick_ : True, the model would narrow the weight of the least important factor down to 1/args.pick_scale.
pick_ : False, do nothing.
p_test = False
p_train = False
A_values = tf.ones(shape=[self.n_factors, len(self.all_h_list)])#4*74054
# get a (n_factors)-length list of [n_users+n_items, n_users+n_items]
# load the initial all-one adjacency values
# .... A_values is a all-ones dense tensor with the size of [n_factors, all_h_list].
# get the ID embeddings of users and items
# .... ego_embeddings is a dense tensor with the size of [n_users+n_items, embed_size];
# .... all_embeddings stores a (n_layers)-len list of outputs derived from different layers.
ego_embeddings = tf.concat([self.weights['user_embedding'], self.weights['item_embedding']], axis=0)#得到初始embedd u+i * embedd—size
all_embeddings = [ego_embeddings]#7208*128
all_embeddings_t = [ego_embeddings]
output_factors_distribution = []
factor_num = [self.n_factors, self.n_factors, self.n_factors]#4
iter_num = [self.n_iterations, self.n_iterations, self.n_iterations]
for k in range(0, self.n_layers):
# prepare the output embedding list
# .... layer_embeddings stores a (n_factors)-len list of outputs derived from the last routing iterations.
n_factors_l = factor_num[k]#4
n_iterations_l = iter_num[k]#2
layer_embeddings = []
layer_embeddings_t = []
# split the input embedding table
# .... ego_layer_embeddings is a (n_factors)-leng list of embeddings [n_users+n_items, embed_size/n_factors]
ego_layer_embeddings = tf.split(ego_embeddings, n_factors_l, 1)#拆分初始embedd list4 7280*32
ego_layer_embeddings_t = tf.split(ego_embeddings, n_factors_l, 1)
# perform routing mechanism
for t in range(0, n_iterations_l):
iter_embeddings = []
iter_embeddings_t = []
A_iter_values = []
# split the adjacency values & get three lists of [n_users+n_items, n_users+n_items] sparse tensors
# .... A_factors is a (n_factors)-len list, each of which is an adjacency matrix
# .... D_col_factors is a (n_factors)-len list, each of which is a degree matrix w.r.t. columns
# .... D_row_factors is a (n_factors)-len list, each of which is a degree matrix w.r.t. rows
if t == n_iterations_l - 1:
p_test = pick_
p_train = False
A_factors, D_col_factors, D_row_factors = self._convert_A_values_to_A_factors_with_P(n_factors_l, A_values, pick= p_train)
A_factors_t, D_col_factors_t, D_row_factors_t = self._convert_A_values_to_A_factors_with_P(n_factors_l, A_values, pick= p_test)
for i in range(0, n_factors_l):
# update the embeddings via simplified graph convolution layer
# .... D_col_factors[i] * A_factors[i] * D_col_factors[i] is Laplacian matrix w.r.t. the i-th factor
# .... factor_embeddings is a dense tensor with the size of [n_users+n_items, embed_size/n_factors]
factor_embeddings = tf.sparse_tensor_dense_matmul(D_col_factors[i], ego_layer_embeddings[i])# embeddin 7280*32
factor_embeddings_t = tf.sparse_tensor_dense_matmul(D_col_factors_t[i], ego_layer_embeddings_t[i])
factor_embeddings_t = tf.sparse_tensor_dense_matmul(A_factors_t[i], factor_embeddings_t)
factor_embeddings = tf.sparse_tensor_dense_matmul(A_factors[i], factor_embeddings)
factor_embeddings = tf.sparse_tensor_dense_matmul(D_col_factors[i], factor_embeddings)
factor_embeddings_t = tf.sparse_tensor_dense_matmul(D_col_factors_t[i], factor_embeddings_t)
if t == n_iterations_l - 1:
layer_embeddings = iter_embeddings
layer_embeddings_t = iter_embeddings_t
# get the factor-wise embeddings
# .... head_factor_embeddings is a dense tensor with the size of [all_h_list, embed_size/n_factors]
# .... analogous to tail_factor_embeddings
head_factor_embedings = tf.nn.embedding_lookup(factor_embeddings, self.all_h_list)#74054*32 这是一阶聚合embed
tail_factor_embedings = tf.nn.embedding_lookup(ego_layer_embeddings[i], self.all_t_list)#74054*32 这是初始embed
# .... constrain the vector length
# .... make the following attentive weights within the range of (0,1)
head_factor_embedings = tf.nn.l2_normalize(head_factor_embedings, axis=1) #标准化
tail_factor_embedings = tf.nn.l2_normalize(tail_factor_embedings, axis=1)
# get the attentive weights
# .... A_factor_values is a dense tensor with the size of [all_h_list,1]
A_factor_values = tf.reduce_sum(tf.multiply(head_factor_embedings, tf.tanh(tail_factor_embedings)), axis=1)#74054 公 式11
# update the attentive weights
# pack (n_factors) adjacency values into one [n_factors, all_h_list] tensor
A_iter_values = tf.stack(A_iter_values, 0)#4*74054
# add all layer-wise attentive weights up.
A_values += A_iter_values
if t == n_iterations_l - 1:
#layer_embeddings = iter_embeddings
# sum messages of neighbors, [n_users+n_items, embed_size]
side_embeddings = tf.concat(layer_embeddings, 1)#7280*128 7280*32 这是一阶聚合后的
side_embeddings_t = tf.concat(layer_embeddings_t, 1)
ego_embeddings = side_embeddings
ego_embeddings_t = side_embeddings_t
# concatenate outputs of all layers
all_embeddings_t += [ego_embeddings_t]
all_embeddings += [ego_embeddings]
all_embeddings = tf.stack(all_embeddings, 1)
all_embeddings = tf.reduce_mean(all_embeddings, axis=1, keepdims=False)
all_embeddings_t = tf.stack(all_embeddings_t, 1)
all_embeddings_t = tf.reduce_mean(all_embeddings_t, axis=1, keep_dims=False)
u_g_embeddings, i_g_embeddings = tf.split(all_embeddings, [self.n_users, self.n_items], 0)
u_g_embeddings_t, i_g_embeddings_t = tf.split(all_embeddings_t, [self.n_users, self.n_items], 0)
return u_g_embeddings, i_g_embeddings, output_factors_distribution, u_g_embeddings_t, i_g_embeddings_t
def _convert_A_values_to_A_factors_with_P(self, f_num, A_factor_values, pick=True):
A_factors = []
D_col_factors = []
D_row_factors = []
# get the indices of adjacency matrix.
A_indices = np.mat([self.all_h_list, self.all_t_list]).transpose()#74054*2
D_indices = np.mat([list(range(self.n_users+self.n_items)), list(range(self.n_users+self.n_items))]).transpose()#7280*2 对角矩阵
# apply factor-aware softmax function over the values of adjacency matrix
# .... A_factor_values is [n_factors, all_h_list]
if pick:
A_factor_scores = tf.nn.softmax(A_factor_values, 0)
min_A = tf.reduce_min(A_factor_scores, 0)
index = A_factor_scores > (min_A + 0.0000001)
index = tf.cast(index, tf.float32)*(self.pick_level-1.0) + 1.0 # adjust the weight of the minimum factor to 1/self.pick_level
A_factor_scores = A_factor_scores * index
A_factor_scores = A_factor_scores / tf.reduce_sum(A_factor_scores, 0)
A_factor_scores = tf.nn.softmax(A_factor_values, 0) #用来存储u-i交互,不同意识图的得分 4*74054
for i in range(0, f_num):
# in the i-th factor, couple the adjacency values with the adjacency indices
# .... A_i_tensor is a sparse tensor with size of [n_users+n_items, n_users+n_items]
A_i_scores = A_factor_scores[i]
A_i_tensor = tf.SparseTensor(A_indices, A_i_scores, self.A_in_shape)#得到每个意识图的得分矩阵
# get the degree values of A_i_tensor
# .... D_i_scores_col is [n_users+n_items, 1]
# .... D_i_scores_row is [1, n_users+n_items]
D_i_col_scores = 1/tf.sqrt(tf.sparse_reduce_sum(A_i_tensor, axis=1)+ 1e-10)#获得列度值
D_i_row_scores = 1/tf.sqrt(tf.sparse_reduce_sum(A_i_tensor, axis=0)+ 1e-10)
# couple the laplacian values with the adjacency indices
# .... A_i_tensor is a sparse tensor with size of [n_users+n_items, n_users+n_items]
D_i_col_tensor = tf.SparseTensor(D_indices, D_i_col_scores, self.A_in_shape)#对角矩阵
D_i_row_tensor = tf.SparseTensor(D_indices, D_i_row_scores, self.A_in_shape)
# return a (n_factors)-length list of laplacian matrix
return A_factors, D_col_factors, D_row_factors
A_factors, D_col_factors, D_row_factors = self._convert_A_values_to_A_factors_with_P(n_factors_l, A_values, pick= p_train)
A_factors_t, D_col_factors_t, D_row_factors_t = self._convert_A_values_to_A_factors_with_P(n_factors_l, A_values, pick= p_test)
# create models
self.ua_embeddings, self.ia_embeddings, self.f_weight, self.ua_embeddings_t, self.ia_embeddings_t = self._create_star_routing_embed_with_P(pick_=self.is_pick)
Establish the final representations for user-item pairs in batch.
self.u_g_embeddings = tf.nn.embedding_lookup(self.ua_embeddings, self.users)
self.u_g_embeddings_t = tf.nn.embedding_lookup(self.ua_embeddings_t, self.users)
self.pos_i_g_embeddings = tf.nn.embedding_lookup(self.ia_embeddings, self.pos_items)
self.pos_i_g_embeddings_t = tf.nn.embedding_lookup(self.ia_embeddings_t, self.pos_items)
self.neg_i_g_embeddings = tf.nn.embedding_lookup(self.ia_embeddings, self.neg_items)
self.u_g_embeddings_pre = tf.nn.embedding_lookup(self.weights['user_embedding'], self.users)
self.pos_i_g_embeddings_pre = tf.nn.embedding_lookup(self.weights['item_embedding'], self.pos_items)
self.neg_i_g_embeddings_pre = tf.nn.embedding_lookup(self.weights['item_embedding'], self.neg_items)
self.cor_u_g_embeddings = tf.nn.embedding_lookup(self.ua_embeddings, self.cor_users)
self.cor_i_g_embeddings = tf.nn.embedding_lookup(self.ia_embeddings, self.cor_items)
def create_cor_loss(self, cor_u_embeddings, cor_i_embeddings):
cor_loss = tf.constant(0.0, tf.float32)
if self.cor_flag == 0:
return cor_loss
ui_embeddings = tf.concat([cor_u_embeddings, cor_i_embeddings], axis=0)#7280*128
ui_factor_embeddings = tf.split(ui_embeddings, self.n_factors, 1)#7280*32 4份
for i in range(0, self.n_factors-1):#
x = ui_factor_embeddings[i] #第一块embedd
y = ui_factor_embeddings[i+1]#第2块embedd
cor_loss += self._create_distance_correlation(x, y)
cor_loss /= ((self.n_factors + 1.0) * self.n_factors/2)
return cor_loss
def _create_distance_correlation(self, X1, X2):
def _create_centered_distance(X):
Used to calculate the distance matrix of N samples.
(However how could tf store a HUGE matrix with the shape like 70000*70000*4 Bytes????)
# calculate the pairwise distance of X
# .... A with the size of [batch_size, embed_size/n_factors]
# .... D with the size of [batch_size, batch_size]
# X = tf.math.l2_normalize(XX, axis=1)
r = tf.reduce_sum(tf.square(X), 1, keepdims=True)
D = tf.sqrt(tf.maximum(r - 2 * tf.matmul(a=X, b=X, transpose_b=True) + tf.transpose(r), 0.0) + 1e-8)
# # calculate the centered distance of X
# # .... D with the size of [batch_size, batch_size]
D = D - tf.reduce_mean(D, axis=0, keepdims=True) - tf.reduce_mean(D, axis=1, keepdims=True) \
+ tf.reduce_mean(D)
return D
def _create_distance_covariance(D1, D2):
# calculate distance covariance between D1 and D2
n_samples = tf.cast(tf.shape(D1)[0], tf.float32)
dcov = tf.sqrt(tf.maximum(tf.reduce_sum(D1 * D2) / (n_samples * n_samples), 0.0) + 1e-8)
# dcov = tf.sqrt(tf.maximum(tf.reduce_sum(D1 * D2)) / n_samples
return dcov
self.loss = self.mf_loss + self.emb_loss + self.cor_loss