【R 推荐系统】基于用户推荐协同过滤算法(UserCF)

系统在现在的互联网应用中很常见,比如,亚马逊会推荐你买书,豆瓣会给你一个书评,影评。那么如何用R语言实现推荐模型?
算法步骤:
1). 建立数据模型
2). 欧氏距离相似度算法
3). 最紧邻算法
4). 推荐算法
5). 运行程序

我们选用一组比较简单的数据集testCF.csv

1	101	5
1	102	3
1	103	2.5
2	101	2
2	102	2.5
2	103	5
2	104	2
3	101	2.5
3	104	4
3	105	4.5
3	107	5
4	101	5
4	103	3
4	104	4.5
4	106	4
5	101	4
5	102	3
5	103	2
5	104	4
5	105	3.5
5	106	4


# 建立数据模型
FileDataModel<-function(file){
  data<-read.csv(file,header=FALSE)
  names(data)<-c("uid","iid","pref")
  
  user <- unique(data$uid)
  item <- unique(sort(data$iid))
  uidx <- match(data$uid, user)
  iidx <- match(data$iid, item)
  M <- matrix(0, length(user), length(item))
  i <- cbind(uidx, iidx, pref=data$pref)
  for(n in 1:nrow(i)){
    M[i[n,][1],i[n,][2]]<-i[n,][3]
  }
  dimnames(M)[[2]]<-item
  M
}

# 欧氏距离相似度算法
EuclideanDistanceSimilarity<-function(M){
  row<-nrow(M)
  s<-matrix(0, row, row)
  for(z1 in 1:row){
    for(z2 in 1:row){
      if(z11) s[z2,z1]<-1 #标准化
        if(s[z2,z1]< -1) s[z2,z1]<- -1 #标准化
        
        #print(paste(z1,z2));print(num);print(sum)
      }
    }
  }
  #补全三角矩阵
  ts<-t(s)
  w<-which(upper.tri(ts))
  s[w]<-ts[w]
  s
}

# 用户近邻算法
NearestNUserNeighborhood<-function(S,n){
  row<-nrow(S)
  neighbor<-matrix(0, row, n)
  for(z1 in 1:row){
    for(z2 in 1:n){
      m<-which.max(S[,z1])
      #       print(paste(z1,z2,m,'\n'))
      neighbor[z1,][z2]<-m
      S[,z1][m]=0
    }
  }
  neighbor
}

# 推荐算法
UserBasedRecommender<-function(uid,n,M,S,N){
  row<-ncol(N)
  col<-ncol(M)
  r<-matrix(0, row, col)
  N1<-N[uid,]
  for(z1 in 1:length(N1)){
    num<-intersect(which(M[uid,]==0),which(M[N1[z1],]!=0)) #可计算的列
    #     print(num)
    
    for(z2 in num){
      #       print(paste("for:",z1,N1[z1],z2,M[N1[z1],z2],S[uid,N1[z1]]))
      r[z1,z2]=M[N1[z1],z2]*S[uid,N1[z1]]
    }
  }
  
  sum<-colSums(r)
  s2<-matrix(0, 2, col)
  for(z1 in 1:length(N1)){
    num<-intersect(which(colSums(r)!=0),which(M[N1[z1],]!=0))
    for(z2 in num){
      s2[1,][z2]<-s2[1,][z2]+S[uid,N1[z1]]
      s2[2,][z2]<-s2[2,][z2]+1
    }
  }
  
  s2[,which(s2[2,]==1)]=10000
  s2<-s2[-2,]
  
  r2<-matrix(0, n, 2)
  rr<-sum/s2
  item <-dimnames(M)[[2]]
  for(z1 in 1:n){
    w<-which.max(rr)
    if(rr[w]>0.5){
      r2[z1,1]<-item[which.max(rr)]
      r2[z1,2]<-as.double(rr[w])
      rr[w]=0
    }
  }
  r2
}


# 主程序

FILE<-"F:/testCF.csv"
NEIGHBORHOOD_NUM<-2
RECOMMENDER_NUM<-3
M<-FileDataModel(FILE)
S<-EuclideanDistanceSimilarity(M)
N<-NearestNUserNeighborhood(S,NEIGHBORHOOD_NUM)
R1<-UserBasedRecommender(1,RECOMMENDER_NUM,M,S,N);R1
R2<-UserBasedRecommender(2,RECOMMENDER_NUM,M,S,N);R2
R3<-UserBasedRecommender(3,RECOMMENDER_NUM,M,S,N);R3
R4<-UserBasedRecommender(4,RECOMMENDER_NUM,M,S,N);R4
R5<-UserBasedRecommender(5,RECOMMENDER_NUM,M,S,N);R5

运行结果:

> R1<-UserBasedRecommender(1,RECOMMENDER_NUM,M,S,N);R1
     [,1]  [,2]  
[1,] "104" "4.25"
[2,] "106" "4"   
[3,] "0"   "0"   
> R2<-UserBasedRecommender(2,RECOMMENDER_NUM,M,S,N);R2
     [,1]  [,2]              
[1,] "105" "3.95699903407931"
[2,] "0"   "0"               
[3,] "0"   "0"               
> R3<-UserBasedRecommender(3,RECOMMENDER_NUM,M,S,N);R3
     [,1]  [,2]              
[1,] "103" "3.18540697329411"
[2,] "102" "2.80243217111765"
[3,] "0"   "0"               
> R4<-UserBasedRecommender(4,RECOMMENDER_NUM,M,S,N);R4
     [,1]  [,2]
[1,] "102" "3" 
[2,] "0"   "0" 
[3,] "0"   "0" 
> R5<-UserBasedRecommender(5,RECOMMENDER_NUM,M,S,N);R5
     [,1] [,2]
[1,]    0    0
[2,]    0    0
[3,]    0    0
> 

对用户1,推荐计算得分最高的两个物品,104和106
对用户2,推荐计算得分最高的1个物品,105
对用户3,推荐计算得分最高的2个物品,103和102
对用户4,推荐计算得分最高的1个物品,102
对用户5,没有推荐

参考链接:
http://blog.fens.me/series-r/

你可能感兴趣的:(数据科学--R语言)