#!/usr/bin/python #-*-coding:utf-8 -*- from math import sqrt # 欧几里得距离 #收集偏好 critics={"Lisa Rose":{"Lady in the Water":2.5,"Snake on a plane ":3.5,"Juse My luck ":3.0,"Superman Returns ":3.5,"You,me and Durpee ":2.5, "The Night Listener":3.0,}, "Gene Seymour":{"Lady in the Water":3.0,"Snake on a plane ":3.5,"Juse My luck ":1.5,"Superman Returns ":5.0,"You,me and Durpee ":3.5, "The Night Listener":3.0}, "Michael Phillips":{"Lady in the Water":2.5,"Snake on a plane ":3.0,"Superman Returns ":3.5, "The Night Listener":4.0,}, "Claudia Puig":{"Snake on a plane ":3.5,"Juse My luck ":3.0,"Superman Returns ":4.0,"You,me and Durpee ":2.5, "The Night Listener":4.5,}, "Mick LaSalle":{"Lady in the Water":3.0,"Snake on a plane ":4.0,"Juse My luck ":2.0,"Superman Returns ":3.0,"You,me and Durpee ":2.0, }, "Jack Mattews":{"Lady in the Water":3.0,"Snake on a plane ":4.0,"Juse My luck ":3.0,"Superman Returns ":5.0,"You,me and Durpee ":3.5, "The Night Listener":3.0,}, } # 寻找相近的用户 欧几里得距离 pow(n,2)对n进行2 次方 s=1/(1+sqrt(pow(2.5-3.0,2)+pow(3.5-3.5,0))) # print(s)
皮尔逊代码实现 # 具体的实现 def sim_distance(per,person1,person2): items={} #判断他们之间有没有对于同一个物品的评价 for item in per[person1]: # for 循环得到的是 key # print(item) if item in per[person2]: # print(item) items[item]=1 if items==None: return 0; # 计算每一轴向上的差值 # 原理: 对每一个轴向上的差值平方后求和,在对他们进行求平方根 sum_of_squeres=sum(pow(per[person1][item]-per[person2][item],2) for item in per[person2] if item in per[person1]) # 最后 return 1/(1+sqrt(sum_of_squeres)) if __name__=="__main__": print(sim_distance(critics,"Lisa Rose","Lisa Rose")) print(sim_distance(critics,"Lisa Rose","Gene Seymour")) print(sim_distance(critics,"Lisa Rose","Michael Phillips"))
#寻找相近的用户(person1,person2 之间的相似度,per 相当于数据源) def sim_person(prefs,p1,p2): items={} # 查找两者都有的评论 for item in prefs[p1]: if item in prefs[p2]: items[item]=1 n=len(items) # print(str(n)+" n") if n==0: return 1 #评分总和 sum_p1=sum(prefs[p1][item] for item in items) sum_p2=sum(prefs[p2][item] for item in items) #平方和 pow_p1=sum(pow(prefs[p1][item],2) for item in items) pow_p2=sum(pow(prefs[p2][item],2) for item in items ) #对应评分乘积之和 corresponding_p1p2=sum(prefs[p1][item]*prefs[p2][item] for item in items) # print(product) #计算皮尔逊评价值 num=corresponding_p1p2-(sum_p1*sum_p2/n) den=((pow_p1-pow(sum_p1,2)/n)*(pow_p2-pow(sum_p2,2)/n))**0.5 # print(den) if den==0: return 0 r=num/den return r