电影评分预测案例--基于用户协同过滤的推荐

基于电影评分数据的推荐案例:

import numpy as np
import pandas as pd

dtype = {'userId': np.int32, 'movieId': np.int32, 'rating':np.float32}
ratings = pd.read_csv('./ml-latest-small/ratings.csv', dtype=dtype, usecols=range(3))

# 构建透视表 找到用户和电影之间的评分关系
ratings_matrix = ratings.pivot_table(values=['rating'],index=['userId'], columns=['movieId'])
ratings_matrix

# 计算用户-物品相似度(皮尔逊相关系数)

# 用户:
user_sim = ratings_matrix.T.corr()

def predict(uid, iid, ratings_matrix, user_sim):
    """
    预测给定用户对给定物品的评分
    """
    #1.找出uid用户的相似用户
    sim_users = user_sim[uid].drop([uid]).dropna()
    # 相似用户筛选规则,正相关用户
    sim_users = sim_users.where(sim_users>0).dropna()
    
    #2.从uid用户近邻相似用户中筛选对iid有过评分的用户
    ids = set(ratings_matrix.loc[:, ('rating',iid)].dropna().index) & set(sim_users.index)
    final_users = sim_users.loc[list(ids)]
    
    #3.结合uid用户与其近邻用户的相似度预测uid用户对iid物品的评分
    sum_up = 0     # 评分预测公式分子
    sum_down = 0   # 评分预测公式分母
    for sim_uid, sim_v in final_users.iteritems():
        # 近邻用户的评分数据
        sim_user_rated_movies = ratings_matrix.loc[sim_uid].dropna()
        # 近邻用户对iid物品评分
        sim_user_rating_for_item = sim_user_rated_movies[iid]
        # 计算分子分母
        sum_up += sim_v * sim_user_rating_for_item
        sum_down += sim_v
        
    # 计算预测的评分
    pred_rating = sum_up/sum_down
    print(f"预测出用户{uid}对电影{iid}评分:{pred_rating}")
    
    return round(pred_rating, 2)

def predict_all(uid, ratings_matrix, user_sim):
    # 预测全部评分
    items_ids = ratings_matrix.columns
    
    for iid in items_ids:
        try:
            rating = predict(uid, iid, ratings_matrix, user_sim)
        except Exception as e:
            print(e)
        else:
            yield uid, iid, rating
            
# 预测用户1对所有物品的评分
for i in predict_all(1,ratings_matrix,user_sim):
    pass      

后续可考虑添加过滤规则和Top-N推荐。

你可能感兴趣的:(机器学习,python,推荐系统,推荐系统)