1基于用户的推荐引擎
Datamodel:提供存储和访问用户、项和偏好数据,用于计算
Usersimilarity:计算用户之间的相似度
UserNeighborhood:计算用户的邻居
Recommender :将上述组件组织在一起,为用户提供item推荐
package com.taobao.afan;
import java.io.File;
import java.util.List;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
public class RecommenderIntro {
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
DataModel model = new FileDataModel(new File("./intro.csv")); //加载数据文件
UserSimilarity similarity = new PearsonCorrelationSimilarity(model); //构建相似度计算方式
UserNeighborhood neighborhood =
new NearestNUserNeighborhood(2, similarity, model);
Recommender recommender = new GenericUserBasedRecommender(
model, neighborhood, similarity); //创建推荐引擎
List
recommender.recommend(1, 1); //给用户1推荐一个item
for (RecommendedItem recommendation : recommendations) {
System.out.println(recommendation);
}
}
}
输出:
2011-2-8 12:42:46 org.slf4j.impl.JCLLoggerAdapter info
信息: Creating FileDataModel for file ./intro.csv
2011-2-8 12:42:48 org.slf4j.impl.JCLLoggerAdapter info
信息: Reading file info...
2011-2-8 12:42:48 org.slf4j.impl.JCLLoggerAdapter info
信息: Read lines: 21
2011-2-8 12:42:48 org.slf4j.impl.JCLLoggerAdapter info
信息: Processed 5 users
RecommendedItem[item:104, value:4.253491]
推荐引擎评价
package com.taobao.afan;
import java.io.File;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
public class EvaluatorInfo {
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
RandomUtils.useTestSeed();
DataModel model = new FileDataModel(new File("./intro.csv"));
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RecommenderBuilder builder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model)
throws TasteException {
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood =
new NearestNUserNeighborhood(2, similarity, model);
return
new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
double score = evaluator.evaluate(builder, null, model, 0.7, 1.0);
System.out.println(score);
}
}
输出:
2011-2-8 13:35:31 org.slf4j.impl.JCLLoggerAdapter info
信息: Creating FileDataModel for file ./intro.csv
2011-2-8 13:35:31 org.slf4j.impl.JCLLoggerAdapter info
信息: Beginning evaluation using 0.7 of FileDataModel[dataFile:/root/workspace/RecommenderIntro/./intro.csv]
2011-2-8 13:35:31 org.slf4j.impl.JCLLoggerAdapter info
信息: Reading file info...
2011-2-8 13:35:31 org.slf4j.impl.JCLLoggerAdapter info
信息: Read lines: 21
2011-2-8 13:35:31 org.slf4j.impl.JCLLoggerAdapter info
信息: Processed 5 users
2011-2-8 13:35:31 org.slf4j.impl.JCLLoggerAdapter info
信息: Processed 5 users
2011-2-8 13:35:31 org.slf4j.impl.JCLLoggerAdapter info
信息: Beginning evaluation of 3 users
2011-2-8 13:35:31 org.slf4j.impl.JCLLoggerAdapter info
信息: Starting timing of 3 tasks in 1 threads
2011-2-8 13:35:31 org.slf4j.impl.JCLLoggerAdapter info
信息: Average time per recommendation: 11ms
2011-2-8 13:35:31 org.slf4j.impl.JCLLoggerAdapter info
信息: Approximate memory used: 1MB / 7MB
2011-2-8 13:35:31 org.slf4j.impl.JCLLoggerAdapter info
信息: Evaluation result: 0.5
0.5
2评价准确性和返回率
package com.taobao.afan;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.IRStatistics;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;
import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import java.io.File;
class IREvaluatorIntro {
public static void main(String[] args) throws Exception {
DataModel model = new FileDataModel(new File("./intro.csv"));
RecommenderIRStatsEvaluator evaluator =
new GenericRecommenderIRStatsEvaluator();
// 创建推荐引擎
RecommenderBuilder recommenderBuilder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model) throws TasteException {
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood =
new NearestNUserNeighborhood(2, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
// 评估准确性和返回率 "at 2":
IRStatistics stats = evaluator.evaluate(recommenderBuilder,
null, model, null, 2,
GenericRecommenderIRStatsEvaluator.CHOOSE_THRESHOLD,
1.0);
System.out.println(stats.getPrecision());
System.out.println(stats.getRecall());
}
}
输出:
2011-2-8 13:44:52 org.slf4j.impl.JCLLoggerAdapter info
信息: Creating FileDataModel for file ./intro.csv
2011-2-8 13:44:52 org.slf4j.impl.JCLLoggerAdapter info
信息: Reading file info...
2011-2-8 13:44:52 org.slf4j.impl.JCLLoggerAdapter info
信息: Read lines: 21
2011-2-8 13:44:52 org.slf4j.impl.JCLLoggerAdapter info
信息: Processed 5 users
2011-2-8 13:44:52 org.slf4j.impl.JCLLoggerAdapter info
信息: Processed 5 users
2011-2-8 13:44:52 org.slf4j.impl.JCLLoggerAdapter info
信息: Evaluated with user 2 in 29ms
2011-2-8 13:44:52 org.slf4j.impl.JCLLoggerAdapter info
信息: Precision/recall/fall-out: 0.0 / 0.0 / 0.3333333333333333
2011-2-8 13:44:52 org.slf4j.impl.JCLLoggerAdapter info
信息: Processed 5 users
2011-2-8 13:44:52 org.slf4j.impl.JCLLoggerAdapter info
信息: Evaluated with user 4 in 0ms
2011-2-8 13:44:52 org.slf4j.impl.JCLLoggerAdapter info
信息: Precision/recall/fall-out: 0.25 / 0.5 / 0.25
0.25
0.5