Hadoop2.3.0上部署Mahout0.10,并测试单机版与分布式版个性化推荐程序
1 Eclipse中Hadoop2.3.0及Mahout0.10相关jar包部署
Hadoop2以上需要使用Mahout0.10以上版本才可以直接运行,否则需要重新编译Mahout相关jar包。本文直接使用Mahout0.10版本,执行前在Eclipse中分别倒入Hadoop2.3.0和Mahout0.10相关jar包即可。Eclipse中Hadoop2.3.0jar包部署见上篇文章:eclipse中hadoop2.3.0环境部署及在eclipse中直接提交mapreduce任务,Eclipse中Mahout0.10jar包部署如下图所示:
2 单机版个性化推荐源码
import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.eval.IRStatistics; import org.apache.mahout.cf.taste.eval.RecommenderBuilder; import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator; import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator; import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood; import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity; import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; import org.apache.mahout.cf.taste.recommender.RecommendedItem; import org.apache.mahout.cf.taste.recommender.Recommender; import org.apache.mahout.cf.taste.similarity.UserSimilarity; /** * 产品推荐单机运行模式 * * @author hadoop * */ // 用户id 产品id 评分 // 1,101,5.0 // 1,102,3.0 // 1,103,2.5 // 2,101,2.0 // 2,102,2.5 // 2,103,5.0 // 2,104,2.0 // 3,101,2.5 // 3,104,4.0 // 3,105,4.5 // 3,107,5.0 // 4,101,5.0 // 4,103,3.0 // 4,104,4.5 // 4,106,4.0 // 5,101,4.0 // 5,102,3.0 // 5,103,2.0 // 5,104,4.0 // 5,105,3.5 // 5,106,4.0 public class UserCF { final static int NEIGHBORHOOD_NUM = 2;// 和相邻多少个用户进行关联求相似度 final static int RECOMMENDER_NUM = 3;// 每个用户推荐产品的数量 /** * @description DataModel负责存储和提供用户、项目、偏好的计算所需要的数据 * UserSimiliarity提供了一些基于某种算法的用户相似度度量的方法 * UserNeighborhood定义了一个和某指定用户相似的用户集合 * Recommender利用所有的组件来为一个用户产生一个推荐结果,另外他也提供了一系列的相关方法 * @param args * @throws IOException * @throws TasteException */ public static void main(String[] args) throws IOException, TasteException { String file = "E:/hadoop/mahout0.9_1jars/mahout_in1.txt";// 数据文件路径,可以是压缩文件 DataModel model = new FileDataModel(new File(file));// 加载数据 UserSimilarity user = new EuclideanDistanceSimilarity(model);// 计算用户相似度,权重值为(0,1] NearestNUserNeighborhood neighbor = new NearestNUserNeighborhood( NEIGHBORHOOD_NUM, user, model);// 寻找相似用户 Recommender r = new GenericUserBasedRecommender(model, neighbor, user); LongPrimitiveIterator iter = model.getUserIDs(); while (iter.hasNext()) { long uid = iter.nextLong(); List<RecommendedItem> list = r.recommend(uid, RECOMMENDER_NUM); System.out.printf("uid:%s", uid); for (RecommendedItem ritem : list) { System.out.printf("(%s,%f)", ritem.getItemID(), ritem.getValue()); } System.out.println(); } /** * 推荐结果评估 */ RecommenderIRStatsEvaluator evaluator = new GenericRecommenderIRStatsEvaluator(); RecommenderBuilder recommenderBuilder = new RecommenderBuilder() { @Override public Recommender buildRecommender(DataModel model) throws TasteException { UserSimilarity similarity = new PearsonCorrelationSimilarity( model); UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model); return new GenericUserBasedRecommender(model, neighborhood, similarity); } }; IRStatistics stats = evaluator.evaluate(recommenderBuilder, null, model, null, 2, GenericRecommenderIRStatsEvaluator.CHOOSE_THRESHOLD, 1.0); System.out.println("查准率: " + stats.getPrecision());//查准率 System.out.println("召回率: " + stats.getRecall());//召回率 } }
运行结果:
uid:1(104,4.274336)(106,4.000000)
uid:2(105,4.055916)
uid:3(103,3.360987)(102,2.773169)
uid:4(102,3.000000)
uid:5
查准率: 0.75
召回率: 1.0
3 分布式版个性化推荐源码
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ToolRunner; import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob; import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CityBlockSimilarity; import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CooccurrenceCountSimilarity; import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CosineSimilarity; import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.EuclideanDistanceSimilarity; import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.LoglikelihoodSimilarity; import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CosineSimilarity; import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.TanimotoCoefficientSimilarity; public class MahoutJobTest { public static void main(String args[]) throws Exception{ Configuration conf= new Configuration(); conf.set("fs.default.name", "hdfs://192.168.1.100:9000"); conf.set("hadoop.job.user", "hadoop"); conf.set("mapreduce.framework.name", "yarn"); conf.set("mapreduce.jobtracker.address", "192.168.1.101:9001"); conf.set("yarn.resourcemanager.hostname", "192.168.1.101"); conf.set("yarn.resourcemanager.admin.address", "192.168.1.101:8033"); conf.set("yarn.resourcemanager.address", "192.168.1.101:8032"); conf.set("yarn.resourcemanager.resource-tracker.address", "192.168.1.101:8031"); conf.set("yarn.resourcemanager.scheduler.address", "192.168.1.101:8030"); String[] str ={ "-i","hdfs://192.168.1.100:9000/data/test_in/mahout_in1.csv", "-o","hdfs://192.168.1.100:9000/data/test_out/mahout_out_CityBlockSimilarity/rec001", "-n","3", "-b","false", //mahout自带的相似类列表 // SIMILARITY_COOCCURRENCE(CooccurrenceCountSimilarity.class), // SIMILARITY_LOGLIKELIHOOD(LoglikelihoodSimilarity.class), // SIMILARITY_TANIMOTO_COEFFICIENT(TanimotoCoefficientSimilarity.class), // SIMILARITY_CITY_BLOCK(CityBlockSimilarity.class), // SIMILARITY_COSINE(CityBlockSimilarity.class), // SIMILARITY_PEARSON_CORRELATION(CosineSimilarity.class), // SIMILARITY_EUCLIDEAN_DISTANCE(EuclideanDistanceSimilarity.class); "-s","SIMILARITY_CITY_BLOCK", "--maxPrefsPerUser","70", "--minPrefsPerUser","2", "--maxPrefsInItemSimilarity","70", "--outputPathForSimilarityMatrix","hdfs://192.168.1.100:9000/data/test_out/mahout_out_CityBlockSimilarity/matrix/rec001", "--tempDir","hdfs://192.168.1.100:9000/data/test_out/mahout_out_CityBlockSimilarity/temp/rec001" }; ToolRunner.run(conf, new RecommenderJob(), str); } }