Mahout学习笔记(一)之collaborative filtering

collaborative filtering的思想很简单,人们喜欢像自己的人喜欢的东西(人以类聚,user-based),人们喜欢像自己喜欢的东西的东西(item-based)物以群分。

package MahoutStudy.inaction;

import java.io.File;
import java.io.IOException;
import java.util.List;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.IRStatistics;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.common.RandomUtils;

class RecommenderIntro_1 {

	static void testIR() throws Exception {
		RandomUtils.useTestSeed();
		DataModel model = new FileDataModel(new File("intro.csv"));
		RecommenderIRStatsEvaluator evaluator = new GenericRecommenderIRStatsEvaluator();
		RecommenderBuilder recommenderBuilder = new RecommenderBuilder() {
			public Recommender buildRecommender(DataModel model) throws TasteException {
				UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
				UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model);
				return new GenericUserBasedRecommender(model, neighborhood, similarity);
			}
		};
		IRStatistics stats = evaluator.evaluate(recommenderBuilder, null, model, null, 2,
				GenericRecommenderIRStatsEvaluator.CHOOSE_THRESHOLD, 1.0);// CHOOSE_THRESHOLD用来区分什么是good
																			// recommendation,mahout内部使用平均值+标准差
		System.out.println(stats.getPrecision());
		System.out.println(stats.getRecall());
	}

	static void testEvaluate() throws IOException, TasteException {
		RandomUtils.useTestSeed();

		DataModel model = new FileDataModel(new File("intro.csv"));

		RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();

		// RecommenderEvaluator evaluator = new RMSRecommenderEvaluator();

		RecommenderBuilder builder = new RecommenderBuilder() {
			public Recommender buildRecommender(DataModel model) throws TasteException {
				UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
				UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model);
				return new GenericUserBasedRecommender(model, neighborhood, similarity);

				// return new SlopeOneRecommender(model);
			}
		};

		double score = evaluator.evaluate(builder, null, model, 0.8, 1.0);
		System.out.println(score);
	}

	static void test() throws Exception {
		DataModel model = new FileDataModel(new File("intro.csv"));
		UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
		UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model);
		Recommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
		List<RecommendedItem> recommendations = recommender.recommend(1, 3);
		for (RecommendedItem recommendation : recommendations) {
			System.out.println(recommendation);
		}
	}

	public static void main(String[] args) throws Exception {

		test();

		testEvaluate();

		testIR();

	}
}

你可能感兴趣的:(hadoop,Mahout,Data,learning,machine,Mining,Collaborative,filtering)