推荐系统常用评价指标及实现

code example websites:
https://github.com/samlobel/RaCT_CF/blob/master/utils/evaluation_functions.py
https://github.com/dawenl/cofactor/blob/master/src/rec_eval.py
https://python.hotexamples.com/examples/bottleneck/-/argpartsort/python-argpartsort-function-examples.html

import numpy as np
import bottleneck as bn
import sys,math

hit rate implementations:
https://medium.com/@rishabhbhatia315/recommendation-system-evaluation-metrics-3f6739288870

def HR_at_k(X_pred, X_true, k=10):
    batch_users = X_pred.shape[0]
    idx = bn.argpartition(-X_pred, k, axis=1)
    X_pred_binary = np.zeros_like(X_pred, dtype=bool)
    X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True
    X_true_binary = (X_true > 0)
    hits_num = np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)
    return np.mean(hits_num/k)

def hit_rate1(X_pred, X_true, topk=5):
    num_users = len(X_pred)
    actual = [[] for _ in range(num_users)]
    where = np.where(X_true!=0)
    for idx in range(len(where[0])):
        actual[where[0][idx]].append(where[1][idx])
    # 
    rank = np.argsort(-X_pred)
    predicted = rank[:,:topk]
    #
    hits = 0
    num_users = len(predicted)
    for i in range(num_users):
        act_set = set(actual[i])
        pred_set = set(predicted[i][:topk])
        for item in pred_set:
            if item in act_set:
                hits += 1
    return hits/topk/num_users

precision & recall:

def precision_recall_at_k(X_pred, X_true, k=10):
    num_users = len(X_pred)
    actual = [[] for _ in range(num_users)]
    where = np.where(X_true!=0)
    for idx in range(len(where[0])):
        actual[where[0][idx]].append(where[1][idx])
    # 
    rank = np.argsort(-X_pred)
    predicted = rank[:,:k]
    sum_recall = 0.0
    sum_precision = 0.0
    true_users = 0
    for i in range(num_users):
        act_set = set(actual[i])
        pred_set = set(predicted[i])
        if len(act_set) != 0:
            sum_precision += len(act_set & pred_set) / float(k)
            sum_recall += len(act_set & pred_set) / float(len(act_set))
            true_users += 1
    return sum_precision / true_users, sum_recall / true_users


#下面这个结果非常小
def precision_recall(x_pred, x_true, k = 10):
    epsilon = 1e-10
    pred_idx = bn.argpartition(-x_pred, k, axis=1)
    x_pred_binary = np.zeros_like(x_pred)
    x_pred_binary[pred_idx < k] = 1
    x_true_binary = (x_true>0).astype(np.int)
    tp = np.sum(x_pred_binary*x_true_binary, axis=1)
    fp = np.sum((1-x_pred_binary)*x_true_binary, axis=1)
    fn = np.sum(x_pred_binary*(1-x_true_binary), axis=1)
    p = tp/(tp+fp+epsilon)#epsilon的意义在于防止分母为0
    r = tp/(tp+fn+epsilon)
    # print(tp,fp,fn)
    # f1 = 2*p*r/(p+r+epsilon)
    # f1 = np.where(np.isnan(f1), np.zeros_like(f1), f1)
    # f1 = np.mean(f1)
    return p,r

recall的另外一个实现

def Recall_at_k(X_pred, X_true, k=10):
    batch_users = X_pred.shape[0]
    idx = bn.argpartition(-X_pred, k, axis=1)
    X_pred_binary = np.zeros_like(X_pred, dtype=bool)
    X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True
    X_true_binary = (X_true > 0)
    tmp = (np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)).astype(
        np.float32)
    recall = tmp / np.minimum(k, X_true_binary.sum(axis=1))
    return np.nan_to_num(recall)

NDCG
注意：以下代码要把范围调到0-1之间

def NDCG_binary_at_k_batch(X_pred, heldout_batch, k=10):
    """
    normalized discounted cumulative gain@k for binary relevance
    ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
    """
    batch_users = X_pred.shape[0]
    # x_pred_binary = (X_pred>0)*1
    idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
    # 
    topk_part = X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk_part[:, :k]]
    idx_part = np.argsort(-topk_part, axis=1)
    # X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk] is the sorted
    # topk predicted score
    idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
    # build the discount template
    tp = 1.0 / np.log2(np.arange(2, k + 2))
    # 
    DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis], idx_topk] * tp).sum(axis=1)
    IDCG = np.array([(tp[: min(n, k)]).sum() for n in np.sum(heldout_batch!=0,axis=1)])
    return np.mean(DCG / IDCG)

#以下的代码初步结果一样

def NDCG_binary_at_k_batch1(X_pred, heldout_batch, k=10, input_batch=None, normalize=True):
    '''
    normalized discounted cumulative gain@k for binary relevance
    ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
    If normalize is set to False, then we actually return DCG, not NDCG.
    '''
    if input_batch is not None:
        X_pred[input_batch.nonzero()] = -np.inf
    batch_users = X_pred.shape[0]
    # Get the indexes of the top K predictions.
    idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
    # Get only the top k predictions.
    topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
                       idx_topk_part[:, :k]]
    # Get sorted index...
    idx_part = np.argsort(-topk_part, axis=1)
    # X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk] is the sorted
    # topk predicted score
    # Get sorted index...
    idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
    # build the discount template
    tp = 1. / np.log2(np.arange(2, k + 2))
    # You add up the ones you've seen, scaled by their discount...
    # top_k_results = heldout_batch[np.arange()]
    maybe_sparse_top_results = heldout_batch[np.arange(batch_users)[:, np.newaxis], idx_topk]
    try:
        top_results = maybe_sparse_top_results.toarray()
    except:
        top_results = maybe_sparse_top_results
    #
    try:
        number_non_zero = heldout_batch.getnnz(axis=1)
    except:
        number_non_zero = ((heldout_batch > 0) * 1).sum(axis=1)
    #
    DCG = (top_results * tp).sum(axis=1)
    # DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
    #                      idx_topk].toarray() * tp).sum(axis=1)
    # Gets denominator, could be the whole sum, could be only part of it if there's not many.
    IDCG = np.array([(tp[:min(n, k)]).sum()
                     for n in number_non_zero])
    #
    IDCG = np.maximum(0.1, IDCG) #Necessary, because sometimes you're not given ANY heldout things to work with. Crazy...
    # IDCG = np.array([(tp[:min(n, k)]).sum()
    #                  for n in heldout_batch.getnnz(axis=1)])
    # to_return = DCG / IDCG
    # if np.any(np.isnan(to_return)):
    #     print("bad?!")
    #     import ipdb; ipdb.set_trace()
    #     print("dab!?")
    if normalize:
        result = (DCG / IDCG)
    else:
        result = DCG
    result = result.astype(np.float32)
    return result

sklearn的实现，会比以上实现偏小

from sklearn.metrics import precision_score, recall_score, f1_score
def metrics_sklearn(X_pred, X_true, k=10):
    import bottleneck as bn
    from sklearn.metrics import precision_score, recall_score, f1_score,ndcg_score
    pred_idx = bn.argpartition(-X_pred, k, axis=1)
    x_pred_binary = np.zeros_like(X_pred)
    x_pred_binary[pred_idx < k] = 1
    p = np.array([])
    r = np.array([])
    for idx in range(X_pred.shape[0]):
        p = np.append(p,precision_score(np.int8(X_true[idx]>0), x_pred_binary[idx]) )
        r = np.append(r, recall_score(np.int8(X_true[idx]>0), x_pred_binary[idx],'macro'))
    return ndcg_score(np.int8(X_true>0), x_pred_binary), np.mean(p), np.mean(r)

推荐系统常用评价指标及实现

你可能感兴趣的:(推荐系统常用评价指标及实现)