2.5.3 推荐系统算法---基于内容的电影推荐:为用户产生TOP-N推荐结果

# ......

user_profile = create_user_profile()

watch_record = pd.read_csv("datasets/ml-latest-small/ratings.csv", usecols=range(2),dtype={"userId": np.int32, "movieId": np.int32})

watch_record = watch_record.groupby("userId").agg(list)

for uid, interest_words in user_profile.items():
    result_table = {} # 电影id:[0.2,0.5,0.7]
    for interest_word, interest_weight in interest_words:
        related_movies = inverted_table[interest_word]
        for mid, related_weight in related_movies:
            _ = result_table.get(mid, [])
            _.append(interest_weight)    # 只考虑用户的兴趣程度
            # _.append(related_weight)    # 只考虑兴趣词与电影的关联程度
            # _.append(interest_weight*related_weight)    # 二者都考虑
            result_table.setdefault(mid, _)

    rs_result = map(lambda x: (x[0], sum(x[1])), result_table.items())
    rs_result = sorted(rs_result, key=lambda x:x[1], reverse=True)[:100]
    print(uid)
    pprint(rs_result)
    break

    # 历史数据  ==>  历史兴趣程度 ==>  历史推荐结果       离线推荐    离线计算
    # 在线推荐 ===>    娱乐(王思聪)   ===>   我 ==>  王思聪 100%  
    # 近线:最近1天、3天、7天           实时计算

你可能感兴趣的:(推荐系统)