基于协同过滤(用户和项目)的推荐代码python实现

python实现基于用户和项目的协同过滤算法

注:行为物品,列为用户,相似度是根据曼哈顿距离计算的。根据用户对物品的评价(1-10分)来计算用户间、物品间的相似度。

   A B C D E F
tom 6 7 9 2 4 10
jerry 3 7 2 5 6 9
hank 5 7 0 2 8 5
alex 0 6 8 5 6 9
cary 6 8 4 6 4 6
jack 2 7 2 5 6 9
ben 1 0 0 6 6 0

#encoding=utf-8
class Recommendation:
    def __init__(self):
        self.information = {'Tom': {'A': 6.0, 'B': 7.0, 'C': 9.0,'D': 2.0, 'E': 4.0, 'F': 10.0},
                            'Jerry': {'A': 3.0, 'B': 7.0, 'C': 2.0,'D': 5.0, 'E': 6.0, 'F': 9.0},
                            'Hank': {'A': 5.0, 'B': 7.0,'D': 2.0, 'E': 8.0, 'F': 5.0},
                            'Alex': {'B': 6.0, 'C': 8.0,'D': 5.0, 'E': 6.0,'F': 9.0},
                            'Cary': {'A': 6, 'B': 8, 'C': 4,'D': 6,'E': 4,'F': 6,},
                            'Jack': {'A': 2.0, 'B': 7.0,'C': 4,'D': 5.0,'E': 6.0, 'F': 9.0},
                            'Ben': {'A': 2.0,'D': 6.0,'E': 6.0}}

    def _cal_mh_dis(self, user1, user2):
        #计算用户1与用户2的曼哈顿距离
        goods = self._transform()
        distance = 0
        for good in goods:
            user1_good_score = self.information[user1].get(good, 0)
            user2_good_score = self.information[user2].get(good, 0)

            score_diff = user1_good_score - user2_good_score
            if user1_good_score == 0 or user2_good_score == 0:
                score_diff = 0
            distance += abs(score_diff)
        #返回两个用户对相同物品的评价差值总和
        return distance

    def _similarity(self, user1, user2):
        #计算用户1和用户2之间的相似度
        distance = self._cal_mh_dis(user1, user2)
        #归一化处理,temp越大,两个用户之间的相似度越低
        temp = 1 / (distance + 1)
        return temp

    def _transform(self):
        """
        将矩阵用户——物品的键和值进行调换为物品——用户
        return:键值调换后的dict: {key=good, value={key=user, value=score}}
        如下所示:
        {'A': {'Tom': 6.0, 'Jerry': 3.0, 'Hank': 5.0, 'Cary': 6, 'Jack': 2.0, 'Ben': 2.0},
        'B': {'Tom': 7.0, 'Jerry': 7.0, 'Hank': 7.0, 'Alex': 6.0, 'Cary': 8, 'Jack': 7.0},}
        """
        result = {}
        for user in self.information:
            for item in self.information[user]:
                result.setdefault(item, {})
                result[item][user] = self.information[user][item]

        return result

    def _top_matches(self, user, k=2):
        #寻找与user相似度最高的3个用户
        distances = {}
        for p in self.information.keys():
            if p != user:
                distances[p] = self._similarity(p, user)

        return sorted(distances.items(), key=lambda x: x[1], reverse=True)[0:k]

    def recommend_by_people(self, user):
        #基于用户的协同过滤算法
        #为用户未购买过的物品打分:基于与用户最相似的其他k个用户对当前物品的评价的加权平均;

        top_k_users = self._top_matches(user, k=2)
        goods = self._transform()

        #推荐的物品, 及推荐分数; 相似度之和; top_k_users的分数之和
        recommend = {}
        simi_sum = 0
        score = 0
        for good in goods:
            if good not in self.information[user].keys():
                for i in range(len(top_k_users)):
                    current_user = top_k_users[i][0]
                    if good in self.information[current_user].keys():
                        score += self.information[current_user][good] * top_k_users[i][1]
                        simi_sum += top_k_users[i][1]
                recommend[good] = score / simi_sum

        return recommend

    def _cal_goods_similarity(self):
        #计算物品相似度字典
        #good_simi_dict: {key=物品, value={key=物品, value=相似度}}

        goods = self._transform()
        users = self.information.keys()       #用户列表
        simi_good = 0
        good_simi_dict = {}
        for i in goods:
            inner_dict = {}
            for j in goods:
                if i != j:
                    for p in users:
                        if p in goods[i].keys() and p in goods[j].keys():
                            simi_good += abs(goods[i][p] - goods[j][p])
                    simi_good = 1 / (simi_good + 1)
                    inner_dict[j] = simi_good
            good_simi_dict[i] = inner_dict

        return good_simi_dict

    def recommend_by_item(self, user):
        #依据物品进行推荐
        good_simi_dict = self._cal_goods_similarity()
        goods = self._transform().keys()

        recommend = {}
        for good in goods:
            simi = 0
            score = 0
            #good表示当前用户未购买过的物品
            if good not in self.information[user].keys():
                for i in good_simi_dict[good].keys():
                    if i in self.information[user].keys():
                        score += good_simi_dict[good][i] * self.information[user].get(i, 0)
                        simi += good_simi_dict[good][i]
                simi = score / simi
                recommend[good] = simi
        return recommend


if __name__ == "__main__":
    system = Recommendation()
    #计算两用户间的距离
    similarity = system._similarity('Jerry', 'Jack')
    print("当前两用户的相似度为:{0}".format(similarity))
    #和某用户相似度最高的k个用户
    top_k_matches = system._top_matches("Tom", k=2)
    print("与当前用户相似度最高的是:{0}".format(top_k_matches))
    #基于用户的协同过滤推荐
    recommendation = system.recommend_by_people("Ben")
    print("给当前用户推荐是:{0}".format(recommendation))
    #基于物品相似度进行推荐
    print("基于物品相似度推荐物品影:{0}".format(system.recommend_by_item("Ben")))

你可能感兴趣的:(推荐算法,推荐算法,算法,python)