最近做推荐系统,研究了一下Surprise库,使用简单,效果不错。
Github地址:NicolasHug/Surprise
实现功能:
1.item_user_rate_time.txt 数据格式 user item rating timestamp (用户id 项目id 评分 时间戳)
2.数据读取 训练模型
import os
from surprise import SVD
from surprise import SVDpp
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise import KNNBasic
from surprise import BaselineOnly
from surprise import Reader
from surprise.model_selection import KFold
from surprise.model_selection import cross_validate
from surprise.model_selection import GridSearchCV
file_path = os.path.expanduser('item_user_rate_time.txt')
reader = Reader(line_format='user item rating timestamp', sep=',')
surprise_data = Dataset.load_from_file(file_path, reader=reader)
all_trainset = surprise_data.build_full_trainset()
algo = KNNBasic(k=40,min_k=3,sim_options={'user_based': True}) # sim_options={'name': 'cosine','user_based': True} cosine/msd/pearson/pearson_baseline
algo.fit(all_trainset)
3.找到相似用户
def getSimilarUsers(top_k,u_id):
user_inner_id = algo.trainset.to_inner_uid(u_id)
user_neighbors = algo.get_neighbors(user_inner_id, k=top_k)
user_neighbors = (algo.trainset.to_raw_uid(inner_id) for inner_id in user_neighbors)
return user_neighbors
list(getSimilarUsers(5,'13321'))
#Out:['15469', '6018', '264174', '279130', '23779']
4.找到相似项目 sim_options中的user_based设置为false,基于项目相似度进行计算
item_algo = KNNBasic(k=40,min_k=3,sim_options={'user_based': False}) # sim_options={'name': 'cosine','user_based': True} cosine/msd/pearson/pearson_baseline
item_algo.fit(all_trainset)
def getSimilarItems(top_k,item_id):
item_inner_id = item_algo.trainset.to_inner_iid(item_id)
item_neighbors = item_algo.get_neighbors(item_inner_id, k=top_k)
f_item_neighbors = (item_algo.trainset.to_raw_iid(inner_id)
for inner_id in item_neighbors)
return f_item_neighbors
list(getSimilarItems(10,'761'))
#Out:['288', '313', '329', '499', '516']