注:行为物品,列为用户,相似度是根据曼哈顿距离计算的。根据用户对物品的评价(1-10分)来计算用户间、物品间的相似度。
A B C D E F
tom 6 7 9 2 4 10
jerry 3 7 2 5 6 9
hank 5 7 0 2 8 5
alex 0 6 8 5 6 9
cary 6 8 4 6 4 6
jack 2 7 2 5 6 9
ben 1 0 0 6 6 0
#encoding=utf-8
class Recommendation:
def __init__(self):
self.information = {'Tom': {'A': 6.0, 'B': 7.0, 'C': 9.0,'D': 2.0, 'E': 4.0, 'F': 10.0},
'Jerry': {'A': 3.0, 'B': 7.0, 'C': 2.0,'D': 5.0, 'E': 6.0, 'F': 9.0},
'Hank': {'A': 5.0, 'B': 7.0,'D': 2.0, 'E': 8.0, 'F': 5.0},
'Alex': {'B': 6.0, 'C': 8.0,'D': 5.0, 'E': 6.0,'F': 9.0},
'Cary': {'A': 6, 'B': 8, 'C': 4,'D': 6,'E': 4,'F': 6,},
'Jack': {'A': 2.0, 'B': 7.0,'C': 4,'D': 5.0,'E': 6.0, 'F': 9.0},
'Ben': {'A': 2.0,'D': 6.0,'E': 6.0}}
def _cal_mh_dis(self, user1, user2):
#计算用户1与用户2的曼哈顿距离
goods = self._transform()
distance = 0
for good in goods:
user1_good_score = self.information[user1].get(good, 0)
user2_good_score = self.information[user2].get(good, 0)
score_diff = user1_good_score - user2_good_score
if user1_good_score == 0 or user2_good_score == 0:
score_diff = 0
distance += abs(score_diff)
#返回两个用户对相同物品的评价差值总和
return distance
def _similarity(self, user1, user2):
#计算用户1和用户2之间的相似度
distance = self._cal_mh_dis(user1, user2)
#归一化处理,temp越大,两个用户之间的相似度越低
temp = 1 / (distance + 1)
return temp
def _transform(self):
"""
将矩阵用户——物品的键和值进行调换为物品——用户
return:键值调换后的dict: {key=good, value={key=user, value=score}}
如下所示:
{'A': {'Tom': 6.0, 'Jerry': 3.0, 'Hank': 5.0, 'Cary': 6, 'Jack': 2.0, 'Ben': 2.0},
'B': {'Tom': 7.0, 'Jerry': 7.0, 'Hank': 7.0, 'Alex': 6.0, 'Cary': 8, 'Jack': 7.0},}
"""
result = {}
for user in self.information:
for item in self.information[user]:
result.setdefault(item, {})
result[item][user] = self.information[user][item]
return result
def _top_matches(self, user, k=2):
#寻找与user相似度最高的3个用户
distances = {}
for p in self.information.keys():
if p != user:
distances[p] = self._similarity(p, user)
return sorted(distances.items(), key=lambda x: x[1], reverse=True)[0:k]
def recommend_by_people(self, user):
#基于用户的协同过滤算法
#为用户未购买过的物品打分:基于与用户最相似的其他k个用户对当前物品的评价的加权平均;
top_k_users = self._top_matches(user, k=2)
goods = self._transform()
#推荐的物品, 及推荐分数; 相似度之和; top_k_users的分数之和
recommend = {}
simi_sum = 0
score = 0
for good in goods:
if good not in self.information[user].keys():
for i in range(len(top_k_users)):
current_user = top_k_users[i][0]
if good in self.information[current_user].keys():
score += self.information[current_user][good] * top_k_users[i][1]
simi_sum += top_k_users[i][1]
recommend[good] = score / simi_sum
return recommend
def _cal_goods_similarity(self):
#计算物品相似度字典
#good_simi_dict: {key=物品, value={key=物品, value=相似度}}
goods = self._transform()
users = self.information.keys() #用户列表
simi_good = 0
good_simi_dict = {}
for i in goods:
inner_dict = {}
for j in goods:
if i != j:
for p in users:
if p in goods[i].keys() and p in goods[j].keys():
simi_good += abs(goods[i][p] - goods[j][p])
simi_good = 1 / (simi_good + 1)
inner_dict[j] = simi_good
good_simi_dict[i] = inner_dict
return good_simi_dict
def recommend_by_item(self, user):
#依据物品进行推荐
good_simi_dict = self._cal_goods_similarity()
goods = self._transform().keys()
recommend = {}
for good in goods:
simi = 0
score = 0
#good表示当前用户未购买过的物品
if good not in self.information[user].keys():
for i in good_simi_dict[good].keys():
if i in self.information[user].keys():
score += good_simi_dict[good][i] * self.information[user].get(i, 0)
simi += good_simi_dict[good][i]
simi = score / simi
recommend[good] = simi
return recommend
if __name__ == "__main__":
system = Recommendation()
#计算两用户间的距离
similarity = system._similarity('Jerry', 'Jack')
print("当前两用户的相似度为:{0}".format(similarity))
#和某用户相似度最高的k个用户
top_k_matches = system._top_matches("Tom", k=2)
print("与当前用户相似度最高的是:{0}".format(top_k_matches))
#基于用户的协同过滤推荐
recommendation = system.recommend_by_people("Ben")
print("给当前用户推荐是:{0}".format(recommendation))
#基于物品相似度进行推荐
print("基于物品相似度推荐物品影:{0}".format(system.recommend_by_item("Ben")))