系统在现在的互联网应用中很常见,比如,亚马逊会推荐你买书,豆瓣会给你一个书评,影评。那么如何用R语言实现推荐模型?
算法步骤:
1). 建立数据模型
2). 欧氏距离相似度算法
3). 最紧邻算法
4). 推荐算法
5). 运行程序
我们选用一组比较简单的数据集testCF.csv
1 101 5
1 102 3
1 103 2.5
2 101 2
2 102 2.5
2 103 5
2 104 2
3 101 2.5
3 104 4
3 105 4.5
3 107 5
4 101 5
4 103 3
4 104 4.5
4 106 4
5 101 4
5 102 3
5 103 2
5 104 4
5 105 3.5
5 106 4
# 建立数据模型
FileDataModel<-function(file){
data<-read.csv(file,header=FALSE)
names(data)<-c("uid","iid","pref")
user <- unique(data$uid)
item <- unique(sort(data$iid))
uidx <- match(data$uid, user)
iidx <- match(data$iid, item)
M <- matrix(0, length(user), length(item))
i <- cbind(uidx, iidx, pref=data$pref)
for(n in 1:nrow(i)){
M[i[n,][1],i[n,][2]]<-i[n,][3]
}
dimnames(M)[[2]]<-item
M
}
# 欧氏距离相似度算法
EuclideanDistanceSimilarity<-function(M){
row<-nrow(M)
s<-matrix(0, row, row)
for(z1 in 1:row){
for(z2 in 1:row){
if(z11) s[z2,z1]<-1 #标准化
if(s[z2,z1]< -1) s[z2,z1]<- -1 #标准化
#print(paste(z1,z2));print(num);print(sum)
}
}
}
#补全三角矩阵
ts<-t(s)
w<-which(upper.tri(ts))
s[w]<-ts[w]
s
}
# 用户近邻算法
NearestNUserNeighborhood<-function(S,n){
row<-nrow(S)
neighbor<-matrix(0, row, n)
for(z1 in 1:row){
for(z2 in 1:n){
m<-which.max(S[,z1])
# print(paste(z1,z2,m,'\n'))
neighbor[z1,][z2]<-m
S[,z1][m]=0
}
}
neighbor
}
# 推荐算法
UserBasedRecommender<-function(uid,n,M,S,N){
row<-ncol(N)
col<-ncol(M)
r<-matrix(0, row, col)
N1<-N[uid,]
for(z1 in 1:length(N1)){
num<-intersect(which(M[uid,]==0),which(M[N1[z1],]!=0)) #可计算的列
# print(num)
for(z2 in num){
# print(paste("for:",z1,N1[z1],z2,M[N1[z1],z2],S[uid,N1[z1]]))
r[z1,z2]=M[N1[z1],z2]*S[uid,N1[z1]]
}
}
sum<-colSums(r)
s2<-matrix(0, 2, col)
for(z1 in 1:length(N1)){
num<-intersect(which(colSums(r)!=0),which(M[N1[z1],]!=0))
for(z2 in num){
s2[1,][z2]<-s2[1,][z2]+S[uid,N1[z1]]
s2[2,][z2]<-s2[2,][z2]+1
}
}
s2[,which(s2[2,]==1)]=10000
s2<-s2[-2,]
r2<-matrix(0, n, 2)
rr<-sum/s2
item <-dimnames(M)[[2]]
for(z1 in 1:n){
w<-which.max(rr)
if(rr[w]>0.5){
r2[z1,1]<-item[which.max(rr)]
r2[z1,2]<-as.double(rr[w])
rr[w]=0
}
}
r2
}
# 主程序
FILE<-"F:/testCF.csv"
NEIGHBORHOOD_NUM<-2
RECOMMENDER_NUM<-3
M<-FileDataModel(FILE)
S<-EuclideanDistanceSimilarity(M)
N<-NearestNUserNeighborhood(S,NEIGHBORHOOD_NUM)
R1<-UserBasedRecommender(1,RECOMMENDER_NUM,M,S,N);R1
R2<-UserBasedRecommender(2,RECOMMENDER_NUM,M,S,N);R2
R3<-UserBasedRecommender(3,RECOMMENDER_NUM,M,S,N);R3
R4<-UserBasedRecommender(4,RECOMMENDER_NUM,M,S,N);R4
R5<-UserBasedRecommender(5,RECOMMENDER_NUM,M,S,N);R5
运行结果:
> R1<-UserBasedRecommender(1,RECOMMENDER_NUM,M,S,N);R1
[,1] [,2]
[1,] "104" "4.25"
[2,] "106" "4"
[3,] "0" "0"
> R2<-UserBasedRecommender(2,RECOMMENDER_NUM,M,S,N);R2
[,1] [,2]
[1,] "105" "3.95699903407931"
[2,] "0" "0"
[3,] "0" "0"
> R3<-UserBasedRecommender(3,RECOMMENDER_NUM,M,S,N);R3
[,1] [,2]
[1,] "103" "3.18540697329411"
[2,] "102" "2.80243217111765"
[3,] "0" "0"
> R4<-UserBasedRecommender(4,RECOMMENDER_NUM,M,S,N);R4
[,1] [,2]
[1,] "102" "3"
[2,] "0" "0"
[3,] "0" "0"
> R5<-UserBasedRecommender(5,RECOMMENDER_NUM,M,S,N);R5
[,1] [,2]
[1,] 0 0
[2,] 0 0
[3,] 0 0
>
对用户1,推荐计算得分最高的两个物品,104和106
对用户2,推荐计算得分最高的1个物品,105
对用户3,推荐计算得分最高的2个物品,103和102
对用户4,推荐计算得分最高的1个物品,102
对用户5,没有推荐
参考链接:
http://blog.fens.me/series-r/