from math import sqrt
def sim_distance(prefer, person1, person2):
sim = {}
for item in prefer[person1]:
if item in prefer[person2]:
sim[item] = 1
if len(sim)==0:
return 0
sum_all = sum([pow(prefer[person1][item]-prefer[person2][item], 2) for item in sim])
return 1/(1+sqrt(sum_all))
def sim_pearson(prefer, person1, person2):
sim = {}
for item in prefer[person1]:
if item in prefer[person2]:
sim[item] = 1
n = len(sim)
if len(sim)==0:
return -1
sum1 = sum([prefer[person1][item] for item in sim])
sum2 = sum([prefer[person2][item] for item in sim])
sum1Sq = sum( [pow(prefer[person1][item] ,2) for item in sim] )
sum2Sq = sum( [pow(prefer[person2][item] ,2) for item in sim] )
sumMulti = sum([prefer[person1][item]*prefer[person2][item] for item in sim])
num1 = sumMulti - (sum1*sum2/n)
num2 = sqrt( (sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
if num2==0:
return 0
return num1/num2
def topMatches(prefer, person, n=1, similarity=sim_pearson):
scores=[ (similarity(prefer,person,other),other) for other in prefer if other!=person ]
scores.sort()
scores.reverse()
return scores[0:n]
def getRecommendations(prefer, person, similarity=sim_pearson):
totals = {}
simSums = {}
for other in prefer:
if other == person:
continue
else:
sim = similarity(prefer, person, other)
if sim<=0: continue
for item in prefer[other]:
if item not in prefer[person]:
totals.setdefault(item,0)
totals[item] += prefer[other][item]*sim
simSums.setdefault(item,0)
simSums[item] += sim
ranks = [ (total/simSums[item],item) for item,total in totals.items() ]
ranks.sort()
ranks.reverse()
return ranks
if __name__ == "__main__":
print("\n测试计算欧几里得距离的方法sim_distance()....")
Prefer = {"tommy":{'War':2.3,'The lord of wings':3.0,'Kongfu':5.0},
"lily":{'War':2.0,'The lord of wings':3.6,'Kongfu':4.1},
"jim":{'War':1.9,'The lord of wings':4.0,'Beautiful America':4.7,'the big bang':1.0},
"jack":{'War':2.8,'The lord of wings':3.5,'Kongfu':5.5}}
print("sim_distance(dic,'lily','jim') = ",sim_distance(Prefer, 'lily', 'jack'))
print("sim_distance(dic,'tommy','jim') = ",sim_distance(Prefer, 'tommy', 'jim'))
print("sim_distance(dic,'tommy','lily') = ",sim_distance(Prefer, 'tommy', 'lily'))
print("sim_distance(dic,'tommy','jack') = ",sim_distance(Prefer, 'tommy', 'jack'))
print("\n测试计算Pearson系数的方法sim_pearson()....")
print("sim_pearson(dic,'lily','jim') = ",sim_pearson(Prefer, 'lily', 'jim'))
print("sim_pearson(dic,'tommy','jim') = ",sim_pearson(Prefer, 'tommy', 'jim'))
print("sim_pearson(dic,'tommy','lily') = ",sim_pearson(Prefer, 'tommy', 'lily'))
print("sim_pearson(dic,'tommy','jack') = ",sim_pearson(Prefer, 'tommy', 'jack'))
print("\n测试topMatches()方法......")
print(topMatches(Prefer, 'tommy'))
print("\n测试推荐方法getRecommendations(prefer, person, similarity=sim_pearson)......")
print(getRecommendations(Prefer, 'tommy'))