推荐算法_隐语义-梯度下降

import numpy as np

1.模型实现

"""
input
rate_matrix: M行N列 的评分矩阵,值为P*Q.
             P: 初始化用户特征矩阵M*K.
             Q: 初始化物品特征矩阵K*N.
latent_feature_cnt: 隐特征的向量个数
max_iteration:最大迭代次数
alpha:步长
lamda:正则化系数

output
分解之后的P和Q
"""
def LFM_grad_desc(rate_matrix, latent_feature_cnt = 5, max_iteration=3000, alpha=0.0002, lamda=0.004):
    row_count = len(rate_matrix)
    collumn_count = len(rate_matrix[0])
    
    P = np.random.rand(row_count, latent_feature_cnt)
    Q = np.random.rand(collumn_count, latent_feature_cnt)
    Q_T = Q.T
    
    for step in range(max_iteration):
        for i in range(row_count):
            for j in range(collumn_count):
                #有评分才计算误差
                if rate_matrix[i][j] > 0:
                    eij = rate_matrix[i][j] - np.dot(P[i, :], Q_T[:, j])
                    for k in range(latent_feature_cnt):
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q_T[k][j] - 2 * lamda * P[i][k])
                        Q_T[k][j] = Q_T[k][j] + alpha * (2 * eij * P[i][k] - 2 * lamda * Q_T[k][j])
        # 根据更新后的P和Q计算预测评分矩阵                
        new_rate_matrix = np.dot(P, Q_T)
        # 计算损失函数
        e = 0
        for i in range(row_count):
            for j in range(collumn_count):
                if rate_matrix[i][j] > 0:
                    e += (rate_matrix[i][j] - np.dot(P[i, :], Q_T[:, j])) ** 2
                    for k in range(latent_feature_cnt):
                        e += lamda * (P[i][k] ** 2 + Q_T[k][j] ** 2)
        
        if e < 0.001:
            break
    return P, Q_T.T

2.测试

rate_matrix = np.array([[1,0,1,0,0],
              [0,1,0,0,1],
              [1,0,0,1,0],
              [0,0,1,0,0]])
p_matrix, q_matrix = LFM_grad_desc(rate_matrix)
q_matrix_t = p_matrix.dot(q_matrix.T)
print("rate_matrix: ", rate_matrix)
print("p_matrix ", p_matrix)
print("q_matrix ", q_matrix)
print("q_matrix_transposition ", q_matrix_t)
rate_matrix:  [[1 0 1 0 0]
 [0 1 0 0 1]
 [1 0 0 1 0]
 [0 0 1 0 0]]
p_matrix  [[ 0.39838262  0.21615667  0.59681879  0.12563722  0.31467516]
 [ 0.03343738  0.07072222  0.06800504  0.93232861  0.95902427]
 [ 0.23532823  0.96988942  0.08895633  0.36226683  0.75936132]
 [ 0.34066689 -0.10585512  0.5652792   0.5333705   0.57071132]]
q_matrix  [[0.68274874 0.36148227 0.61386909 0.21725792 0.50210512]
 [0.84890166 0.85940009 0.96291038 0.56227718 0.3236845 ]
 [0.88892504 0.82215255 0.44224926 0.2680801  0.6893379 ]
 [0.06904437 0.19699401 0.13920368 0.61362152 0.70375425]
 [0.4080125  0.80941323 0.64614664 0.4080542  0.53613858]]
q_matrix_transposition  [[0.90179633 1.27113415 1.04638706 0.45171469 0.9431139 ]
 [0.77422712 0.98929489 1.02897348 1.27272137 1.00943811]
 [1.02585997 1.56843842 1.66650015 0.97639202 1.49348442]
 [0.94376861 1.22716647 1.00219153 0.81028538 0.9421935 ]]

你可能感兴趣的:(算法,机器学习和数学,推荐算法,机器学习,隐语义)