项亮的推荐系统实践一些代码重现

1、基于UserCF算法

import math
import operator


def UserSimilarity(train):  # P45	最基本的UserCF
    W = dict()
    for u in train.keys():
        for v in train.keys():
            if u == v:
                continue
            if W.get(u):
                W[u].update({v: len(train[u].keys() & train[v].keys())})
            else:
                W.update({u: {v: len(train[u].keys() & train[v].keys())}})
            W[u][v] = round(W[u][v] / math.sqrt(len(train[u])*len(train[v])*1.0) ,2)
    return W


def UserSimilarity2(train):  # P46 利用倒排计算相关性
    item_users = dict()
    for u, items in train.items():
        for i in items.keys():
            if i not in item_users:
                item_users[i] = set()
            item_users[i].add(u)
    C = dict()
    N = dict()
    for i, users in item_users.items():
        for u in users:
            if N.get(u):
                N[u] += 1
            else:
                N.setdefault(u, 1)
            for v in users:
                if u == v:
                    continue
                if C.get(u):
                    if C.get(u).get(v):
                        C[u][v] += 1
                    else:
                        C[u].setdefault(v, 1)
                else:
                    C.update({u: {v: 1}})
    W = dict()
    for u, related_users in C.items():
        W[u] = dict()
        for v, cuv in related_users.items():
            W[u][v] = round(cuv / math.sqrt(N[u] * N[v]), 2)
    return W


def UseSimilarity3(train):  # P48	改进后的UserCF
    item_users = dict()
    for u, items in train.items():
        for i in items.keys():
            if i not in item_users:
                item_users[i] = set()
            item_users[i].add(u)
    C = dict()
    N = dict()
    for i, users in item_users.items():
        for u in users:
            if N.get(u):
                N[u] += 1
            else:
                N.setdefault(u, 1)
            for v in users:
                if u == v:
                    continue
                if C.get(u):
                    if C.get(u).get(v):
                        C[u][v] += 1/math.log(1 + len(users))
                    else:
                        C[u].setdefault(v, 1/math.log(1 + len(users)))
                else:
                    C.update({u: {v: 1/math.log(1 + len(users))}})

    W = dict()
    for u, related_users in C.items():
        W[u] = dict()
        for v, cuv in related_users.items():
            W[u][v] = round(cuv / math.sqrt(N[u] * N[v]), 2)
    return W


def Recommend(train, user, W, K):   # P47 简单推荐
    rank = dict()
    interacted_items = train[user]
    for v, wuv in sorted(W[user].items(), key=operator.itemgetter(1), reverse=True)[0:K]:
        for i, rvi in train[v].items():
            if i in interacted_items:
                continue
            if rank.get(i):
                rank[i] += wuv * rvi
            else:
                rank[i] = wuv * rvi
    return rank


train = {'A': {'a': 1, 'b': 1, 'd': 1}, 'B': {'a': 1, 'c': 1}, 'C': {'b': 1, 'e': 1}, 'D': {'c': 1, 'd': 1, 'e': 1}}
W = UserSimilarity(train)
W2 = UserSimilarity2(train)
W3 = UseSimilarity3(train)
result = Recommend(train, 'A', W, 3)
# print(W)
# print(W2)
# print(W3)
# print(result)

你可能感兴趣的:(python,python,推荐系统,算法)