对于FunkSVD算法,网络上资料一大堆,但总觉得应该要自己去实现一下。
理论的推导过程https://www.cnblogs.com/pinard/p/6351319.html这篇文章说的很清楚了,同时也可以参考该篇文章http://www.cnblogs.com/shenxiaolin/p/8637794.html
需要注意的是kt参数是超参数,是需要调的,可以根据交叉验证或者损失函数来确定
import numpy as np import matplotlib.pyplot as plt R=[ [5,3,None,1], [4,None,None,1], [1,2,None,5], [1,None,None,4], [None,1,5,4]] # 学习率 LR = 1e-2 # 正则项系数 LAMBDA = 1e-2 # 循环次数 EPOCH = 2000 def calc_error(R,P,Q,n_value): row, col = np.nonzero(R != n_value) e = R[row, col] - np.sum(P[row] * Q.T[col],axis=1) return e def funksvd(R,n_value,kt): ''' :param R: 输入的矩阵 :param n_value: 代表空值的值 :param kt: 分解的矩阵的维度 ''' if not isinstance(R,np.ndarray): R = np.array(R) h,w = R.shape # 获取所有不为空值的值用于迭代 row,col = np.nonzero(R != n_value) # 初始化P,Q P = np.random.randn(h,kt) Q = np.random.randn(kt,w) # 梯度下降优化 error_list = [] for step in range(EPOCH): E = calc_error(R,P,Q,n_value) for p in range(len(row)): e = E[p] i,j = row[p],col[p] # 对每个原矩阵有值的i,j位置的都得参与梯度计算 # for k in range(kt): # p_gd = -e * Q[k,j] + LAMBDA * P[i,k] # P[i,k] -= LR * p_gd # q_gd = -e * P[i,k] + LAMBDA * Q[k,j] # Q[k,j] -= LR * q_gd p_gd = -e * Q[:,j] + LAMBDA * P[i] P[i] -= LR * p_gd q_gd = -e * P[i] + LAMBDA * Q[:,j] Q[:,j] -= LR * q_gd E_square = np.sum(np.square(E)) print('step:',step,'error square:',E_square) error_list.append(E_square) return P,Q,error_list P,Q,error_list = funksvd(R,n_value=None,kt=2) print(R) print(P.dot(Q)) plt.plot(error_list) plt.show()