mahout learning 代码示例

mahout learning 代码示例

http://www.cnblogs.com/jerome-rong/archive/2012/05/22/2512947.html


一, Introduction


package mia.recommender.ch02;
//=分析导入包可以看出mahout的包分为主要类以及它们的实现类=
import org.apache.mahout.cf.taste.impl.model.file.*;
import org.apache.mahout.cf.taste.impl.neighborhood.*;
import org.apache.mahout.cf.taste.impl.recommender.*;
import org.apache.mahout.cf.taste.impl.similarity.*;
import org.apache.mahout.cf.taste.model.*;
import org.apache.mahout.cf.taste.neighborhood.*;
import org.apache.mahout.cf.taste.recommender.*;
import org.apache.mahout.cf.taste.similarity.*;
import java.io.*;
import java.util.*;

class RecommenderIntro {

  public static void main(String[] args) throws Exception { 
//=如何从csv的文件中构建mahout的数据表示,DataModel就是来表示<user,item,rating>的知识的=
    DataModel model = new FileDataModel(new File("intro.csv"));
//=user-based的第一步就是找相似用户,所以要定义用户的相似性,包括用什么相似性度量,以及邻居的参数=
    UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
    UserNeighborhood neighborhood =  new NearestNUserNeighborhood(2, similarity, model);
//=一旦确定了相邻用户,那么一个普通的user-based推荐器就可以被构建起来了=
    Recommender recommender = new GenericUserBasedRecommender(
        model, neighborhood, similarity);
//=我们可以来使用它,这里是向用户1推荐1个商品=
    List<RecommendedItem> recommendations =
        recommender.recommend(1, 1);
//=推荐的结果可以输出,这里是:RecommendedItem[item:104, value:4.257081]=
    for (RecommendedItem recommendation : recommendations) {
      System.out.println(recommendation);
    }
  }
}

二, Evaluation


package mia.recommender.ch02;

import org.apache.mahout.cf.taste.impl.model.file.*;
import org.apache.mahout.cf.taste.impl.neighborhood.*;
import org.apache.mahout.cf.taste.impl.recommender.*;
import org.apache.mahout.cf.taste.impl.similarity.*;
import org.apache.mahout.cf.taste.model.*;
import org.apache.mahout.cf.taste.neighborhood.*;
import org.apache.mahout.cf.taste.recommender.*;
import org.apache.mahout.cf.taste.similarity.*;
import java.io.*;
import java.util.*;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.common.RandomUtils;

/**
 *
 * @author wentingtu <wentingtu09 at gmail dot com>
 */
public class RecommenderEvalu
{
    public static void main(String[] args) throws IOException, TasteException
    {
        //=导入org.apache.mahout.common.RandomUtils;=
        //这个是产生唯一的种子使得在划分训练和测试数据的时候具有唯一性= 
        RandomUtils.useTestSeed();
       
        DataModel model = new FileDataModel(new File("intro.csv"));
        //构建评估器,这里用到的性能度量是每个sum( |预测值 - 真实值| ) / 值的个数
        RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
        //=导入 org.apache.mahout.cf.taste.eval.RecommenderBuilder;=
        //这里要涉及用到了一个定义推荐器构造方法的类:RecommenderBuilder
        RecommenderBuilder builder = new RecommenderBuilder()
        {
        //使用方法是重载buildRecommender函数,函数里是构造推荐器的方法
            @Override
            public Recommender buildRecommender(DataModel model)
                    throws TasteException
            {
                UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
                UserNeighborhood neighborhood =
                        new NearestNUserNeighborhood(2, similarity, model);
                return new GenericUserBasedRecommender(model, neighborhood, similarity);
            }
        };
    //=导入 org.apache.mahout.cf.taste.eval.RecommenderEvaluator;=
   //调用评估器,输入有上面构造的推荐器方法,数据模型,训练/全部 比例,验证数据/数据 比例    
   double score = evaluator.evaluate(builder, null, model, 0.7, 1.0);
   //输出评价结果:1.0 证明最后的估计结果是  AverageAbsoluteDifference = 1.0  
   System.out.println(score);
    }
}
复制代码

package mia.recommender.ch02;

import org.apache.mahout.cf.taste.impl.model.file.*;
import org.apache.mahout.cf.taste.impl.neighborhood.*;
import org.apache.mahout.cf.taste.impl.recommender.*;
import org.apache.mahout.cf.taste.impl.similarity.*;
import org.apache.mahout.cf.taste.model.*;
import org.apache.mahout.cf.taste.neighborhood.*;
import org.apache.mahout.cf.taste.recommender.*;
import org.apache.mahout.cf.taste.similarity.*;
import java.io.*;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.IRStatistics;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;
import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator;
import org.apache.mahout.common.RandomUtils;

/**
 *
 * @author Administrator
 */
public class RecommenderEvaluPrecisionRecall {
    public static void main(String[] args) throws IOException, TasteException {
        RandomUtils.useTestSeed();
        DataModel model = new FileDataModel(new File("intro.csv"));
        //=导入org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;=
        //构建评估器
        RecommenderIRStatsEvaluator evaluator =
                new GenericRecommenderIRStatsEvaluator();
        
        RecommenderBuilder recommenderBuilder = new RecommenderBuilder() {
            @Override
            public Recommender buildRecommender(DataModel model)
                    throws TasteException {
                UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
                UserNeighborhood neighborhood =
                        new NearestNUserNeighborhood(2, similarity, model);
                return new GenericUserBasedRecommender(model, neighborhood, similarity);
            }
        };
        //使用评估器,并设定评估期的参数
        //2表示"precision and recall at 2"即相当于推荐top2,然后在top-2的推荐上计算准确率和召回率
        //既然涉及到准确率和召回率,这里就有一个"hit"的定义,就是怎样的一个推荐算是good
        //下面的参数设置是这样定义"good"的:利用阈值threshold = µ + σ 
        //即 user's average preference value µ plus one standard deviation σ
        //如果一个推荐,它的真实分值是高于threshold的,那么它就是"good"
        IRStatistics stats = evaluator.evaluate(
                recommenderBuilder, null, model, null, 2,
                GenericRecommenderIRStatsEvaluator.CHOOSE_THRESHOLD,
                1.0);
        //输出为0.75 1.0
        System.out.println(stats.getPrecision());
        System.out.println(stats.getRecall());
    }
}
复制代码

三,Set preference

复制代码
package mia.recommender.ch03;

import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.PreferenceArray;

/**
 *
 * @author Administrator
 */
public class SetPrefinPreferenceArray {

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) {
        PreferenceArray user1Prefs = new GenericUserPreferenceArray(2);
        user1Prefs.setUserID(0, 1L);
        user1Prefs.setItemID(0, 101L);
        user1Prefs.setValue(0, 2.0f);
        user1Prefs.setItemID(1, 102L);
        user1Prefs.setValue(1, 3.0f);
        Preference pref = user1Prefs.get(1);
    }
}
复制代码

四,User-based CF

复制代码
package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.common.RandomUtils;

/**
 *
 * @author Administrator
 */
public class UserBasedCF {

    public static void recommenderModelEvaluation(DataModel model) throws TasteException {

        RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
        RandomUtils.useTestSeed();

        RecommenderBuilder builder = new RecommenderBuilder() {
            //=============实验参数设置===============
            //1.K近邻 or 阈值近邻
            //近邻:K?
            //阈值近邻:threshold?
            //2.相似度量:Euclidean , Pearson , Log-likelihood , Tanimoto 
            char similarityPattern = 'E';//'E' or 'P' or 'L' or 'T'
            char neighborhoodPattern = 'K';//'K' or 'T'
            int k = 2;
            double threshold = 0.5;

            @Override
            public Recommender buildRecommender(DataModel dm) throws TasteException {
                UserSimilarity similarity = null;
                UserNeighborhood neighborhood = null;
                switch (similarityPattern) {
                    case 'E': {
                        similarity = new EuclideanDistanceSimilarity(dm);
                    }
                    case 'P': {
                        similarity = new PearsonCorrelationSimilarity(dm);
                    }
                    case 'L': {
                        similarity = new LogLikelihoodSimilarity(dm);
                    }
                    case 'T': {
                        similarity = new TanimotoCoefficientSimilarity(dm);
                    }
                }

                switch (neighborhoodPattern) {
                    case 'K': {
                        neighborhood = new NearestNUserNeighborhood(k, similarity, dm);
                    }

                    case 'T': {
                        neighborhood = new ThresholdUserNeighborhood(threshold, similarity, dm);
                    }
                }
                return new GenericUserBasedRecommender(dm, neighborhood, similarity);
            }
        };

        double score = evaluator.evaluate(builder, null, model, 0.7, 1.0);
        System.out.println(score);
    }

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws IOException, TasteException {
        DataModel model = new FileDataModel(new File("data/dating/ratings.dat"));
        recommenderModelEvaluation(model);

    }
}
复制代码

五,Item-based CF

复制代码

package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.common.RandomUtils;

/**
 *
 * @author Administrator
 */
public class ItemBasedCF {

   public static void recommenderModelEvaluation(DataModel model) throws TasteException {

        RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
        RandomUtils.useTestSeed();

        RecommenderBuilder builder = new RecommenderBuilder() {
            //=============实验参数设置===============
            //1.K近邻 or 阈值近邻
            //近邻:K?
            //阈值近邻:threshold?
            //2.相似度量:Euclidean , Pearson , Log-likelihood , Tanimoto 
            char similarityPattern = 'E';//'E' or 'P' or 'L' or 'T'
            @Override
            public Recommender buildRecommender(DataModel dm) throws TasteException {
                ItemSimilarity similarity = null;
                switch (similarityPattern) {
                    case 'E': {
                        similarity = new EuclideanDistanceSimilarity(dm);
                    }
                    case 'P': {
                        similarity = new PearsonCorrelationSimilarity(dm);
                    }
                    case 'L': {
                        similarity = new LogLikelihoodSimilarity(dm);
                    }
                    case 'T': {
                        similarity = new TanimotoCoefficientSimilarity(dm);
                    }
                }

                return new GenericItemBasedRecommender(dm, similarity);
            }
        };

        double score = evaluator.evaluate(builder, null, model, 0.7, 1.0);
        System.out.println(score);
    }

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws IOException, TasteException {
        DataModel model = new FileDataModel(new File("data/dating/ratings.dat"));
        recommenderModelEvaluation(model);

    }
}
复制代码


六,Slope one CF

复制代码

package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.common.Weighting;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.recommender.slopeone.MemoryDiffStorage;
import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;
import org.apache.mahout.common.RandomUtils;

/**
 *
 * @author Administrator
 */
public class SlopeOneCF {

    public static void recommenderModelEvaluation(DataModel model) throws TasteException {
        RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
        RandomUtils.useTestSeed();
        RecommenderBuilder builder = new RecommenderBuilder() {

            long diffStorageNb = 100000;

            @Override
            public Recommender buildRecommender(DataModel dm) throws TasteException {
                DiffStorage diffStorage = new MemoryDiffStorage(dm, Weighting.WEIGHTED, diffStorageNb);
                return new SlopeOneRecommender(dm, Weighting.WEIGHTED, Weighting.WEIGHTED, diffStorage);
            }
        };
    }

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws IOException, TasteException {
        DataModel model = new FileDataModel(new File("data/dating/ratings.dat"));
        recommenderModelEvaluation(model);
    }
}
复制代码

七,一个示例

复制代码

package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
import org.apache.mahout.cf.taste.impl.model.PlusAnonymousUserDataModel;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;

/**
 *
 * @author Administrator
 */
public class LibimsetiWithAnonymousRecommender extends LibimsetiRecommender {

    private final PlusAnonymousUserDataModel plusAnonymousModel;

    public LibimsetiWithAnonymousRecommender()
            throws TasteException, IOException {
        this((DataModel) new FileDataModel(new File("data/dating/ratings.dat")));
    }

    public LibimsetiWithAnonymousRecommender(DataModel model)
            throws TasteException, IOException {
        //调用父类LibimsetiRecommender的构造函数
        super(new PlusAnonymousUserDataModel(model));
        //得到PlusAnonymousUserDataModel对象
        plusAnonymousModel =
                (PlusAnonymousUserDataModel) getDataModel();
    }
    //设计这个推荐器的recommend方法:输入:匿名用户的评分信息 输出:对此匿名用户的推荐
    public synchronized List<RecommendedItem> recommend(
            PreferenceArray anonymousUserPrefs, int topN)
            throws TasteException {
        //利用PlusAnonymousUserDataModel对象的setTempPrefs方法为将匿名用户加入到数据中,
        //并且利用PlusAnonymousUserDataModel.TEMP_USER_ID作为其userID
        plusAnonymousModel.setTempPrefs(anonymousUserPrefs);
        //调用父类LibimsetiRecommender的recommend方法
        //userID现在被PlusAnonymousUserDataModel.TEMP_USER_ID所代替了
        List<RecommendedItem> recommendations =
                recommend(PlusAnonymousUserDataModel.TEMP_USER_ID, topN, null);
        //删除PlusAnonymousUserDataModel.TEMP_USER_ID与匿名用户的关联
        plusAnonymousModel.clearTempPrefs();
        return recommendations;
    }
    //创建当前匿名用户的伪数据
    public PreferenceArray creatAnAnonymousPrefs() {
        PreferenceArray anonymousPrefs =
                new GenericUserPreferenceArray(3);
        anonymousPrefs.setUserID(0, PlusAnonymousUserDataModel.TEMP_USER_ID);
        anonymousPrefs.setItemID(0, 123L);
        anonymousPrefs.setValue(0, 1.0f);
        anonymousPrefs.setItemID(1, 123L);
        anonymousPrefs.setValue(1, 3.0f);
        anonymousPrefs.setItemID(2, 123L);
        anonymousPrefs.setValue(2, 2.0f);
        return anonymousPrefs;
    }

    public static void main(String[] args) throws Exception {

        LibimsetiWithAnonymousRecommender recommender =
                new LibimsetiWithAnonymousRecommender();
        List<RecommendedItem> recommendations =
                recommender.recommend(recommender.creatAnAnonymousPrefs(), 10);
        System.out.println(recommendations);
    }
}

复制代码

 

package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

/**
 *
 * @author Administrator
 */
public class LibimsetiRecommender implements Recommender {

    private final Recommender libimsetiRecommender;
    private final DataModel model;
    private final FastIDSet men;
    private final FastIDSet women;
    //构造函数:一般而言,一个普适的自定义推荐器的输入应该是:DataModel和额外的知识
    //应该将独立于数据的东西构建好:基本的pure CF推荐器

    public LibimsetiRecommender() throws TasteException, IOException {
        this((DataModel) new FileDataModel(new File("data/dating/ratings.dat")));
    }
   //应该将独立于数据的东西构建好:基本的pure CF推荐器,即将libimsetiRecommender设为pure CF
    public LibimsetiRecommender(DataModel model) throws TasteException, IOException {
        UserSimilarity similarity = new EuclideanDistanceSimilarity(model);
        UserNeighborhood neighborhood =
                new NearestNUserNeighborhood(2, similarity, model);
        libimsetiRecommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
        this.model = model;
        FastIDSet[] menWomen = GenderRescorer.generateMenWomen(
                new File(("gender.dat")));
        men = menWomen[0];
        women = menWomen[1];
    }
   //用libimsetiRecommender进行推荐时就加入了由gender信息定义的GenderRescorer
    public List<RecommendedItem> recommend(long userID, int topN) throws TasteException {
        IDRescorer rescorer = new GenderRescorer(men, women, userID, model);
        return libimsetiRecommender.recommend(userID, topN, rescorer); 
        
    }
   //用libimsetiRecommender也提供了自定义IDRescorer进行推荐的方法
    public List<RecommendedItem> recommend(long userID, int topN, IDRescorer idr) throws TasteException {
       return libimsetiRecommender.recommend(userID, topN, idr); 
    }
   //这里要注意,由于libimsetiRecommender真正进行preference的估计是要受到GenderRescorer的rescore的影响的
    public float estimatePreference(long userID, long itemID) throws TasteException {
         IDRescorer rescorer = new GenderRescorer(men, women, userID, model); 
    return (float) rescorer.rescore( 
        itemID, libimsetiRecommender.estimatePreference(userID, itemID));
    }
   //这个可以直接借助于libimsetiRecommender的setPreference
    public void setPreference(long userID, long itemID, float value) throws TasteException {
        libimsetiRecommender.setPreference(userID, itemID, value);
    }
   //这个可以直接借助于libimsetiRecommender的removePreference
    public void removePreference(long userID, long itemID) throws TasteException {
        libimsetiRecommender.removePreference(userID, itemID);
    }
   //这个可以直接借助于libimsetiRecommender的getDataModel
    public DataModel getDataModel() {
        return libimsetiRecommender.getDataModel(); 
    }
   //这个可以直接借助于libimsetiRecommender的refresh
    public void refresh(Collection<Refreshable> alreadyRefreshed) {
        libimsetiRecommender.refresh(alreadyRefreshed);
    }
}

复制代码
package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.common.iterator.FileLineIterable;

/**
 *
 * @author Administrator
 */
public class GenderRescorer implements IDRescorer {

    private final FastIDSet men;//存放当前数据模型对应的所有male selectableUser
    private final FastIDSet women;//存放当前数据模型对应的所有female selectableUser
    private final FastIDSet usersRateMoreMen;//
    private final FastIDSet usersRateLessMen;
    private final boolean likeMen;//表明针对一个用户(userID定义)一个profileID是否应该过滤

    public GenderRescorer(
            FastIDSet men,
            FastIDSet women,
            long userID, DataModel model)
            throws TasteException {
        this.men = men;
        this.women = women;
        this.usersRateMoreMen = new FastIDSet();
        this.usersRateLessMen = new FastIDSet();
        this.likeMen = ratesMoreMen(userID, model);
    }
    //产生数据对应的men和women集合
    public static FastIDSet[] generateMenWomen(File genderFile)
            throws IOException {
        FastIDSet men = new FastIDSet(50000);
        FastIDSet women = new FastIDSet(50000);
        for (String line : new FileLineIterable(genderFile)) {
            int comma = line.indexOf(',');
            char gender = line.charAt(comma + 1);
            if (gender == 'U') {
                continue;
            }
            long profileID = Long.parseLong(line.substring(0, comma));
            if (gender == 'M') {
                men.add(profileID);
            } else {
                women.add(profileID);
            }
        }
        men.rehash();
        women.rehash();
        return new FastIDSet[]{men, women};
    }
    //判断userID对应的用户是不是更喜欢男性,从他/她评过分的那些用户的性别来统计
    private boolean ratesMoreMen(long userID, DataModel model)
            throws TasteException {
        if (usersRateMoreMen.contains(userID)) {
            return true;
        }
        if (usersRateLessMen.contains(userID)) {
            return false;
        }
        PreferenceArray prefs = model.getPreferencesFromUser(userID);
        int menCount = 0;
        int womenCount = 0;
        for (int i = 0; i < prefs.length(); i++) {
            long profileID = prefs.get(i).getItemID();
            if (men.contains(profileID)) {
                menCount++;
            } else if (women.contains(profileID)) {
                womenCount++;
            }
        }
        boolean ratesMoreMen = menCount > womenCount;
        if (ratesMoreMen) {
            usersRateMoreMen.add(userID);
        } else {
            usersRateLessMen.add(userID);
        }
        return ratesMoreMen;
    }
   //对于需要过滤的推荐,设置其值为NaN,这是因为他们不是不能推荐的,而是最差的推荐
    public double rescore(long profileID, double originalScore) {
        return isFiltered(profileID) ? Double.NaN : originalScore;
    }
    //如果一个用户是喜欢男性的,而推荐的又是女性,则这个推荐是应该过滤掉的,反之亦然
    public boolean isFiltered(long profileID) {
        return likeMen ? women.contains(profileID) : men.contains(profileID);
    }
}

复制代码
package mia.recommender.ch05;

import java.util.Collection;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;

/**
 *
 * @author Administrator
 */
public class GenderItemSimilarity  implements ItemSimilarity  {
  private final FastIDSet men; 
  private final FastIDSet women; 
 
  public GenderItemSimilarity(FastIDSet men, FastIDSet women) { 
    this.men = men; 
    this.women = women; 
  } 
 
    public double itemSimilarity(long profileID1, long profileID2) throws TasteException {
    Boolean profile1IsMan = isMan(profileID1); 
    if (profile1IsMan == null) { 
      return 0.0; 
    } 
    Boolean profile2IsMan = isMan(profileID2); 
    if (profile2IsMan == null) { 
      return 0.0; 
    } 
    return profile1IsMan == profile2IsMan ? 1.0 : -1.0; 
    }
    
    private Boolean isMan(long profileID) { 
    if (men.contains(profileID)) { 
      return Boolean.TRUE; 
    } 
    if (women.contains(profileID)) { 
      return Boolean.FALSE; 
    } 
    return null; 
  } 

    public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException{
    double[] result = new double[itemID2s.length]; 
    for (int i = 0; i < itemID2s.length; i++) { 
      result[i] = itemSimilarity(itemID1, itemID2s[i]); 
    } 
    return result; 
  } 
 

    public long[] allSimilarItemIDs(long l) throws TasteException {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    public void refresh(Collection<Refreshable> clctn) {
        throw new UnsupportedOperationException("Not supported yet.");
    }
    
}
复制代码

你可能感兴趣的:(hadoop)