Mahout实现基于性别的IDRescorer

<span style="font-size:18px;">/***
 * @author YangXin
 * @info 基于性别的IDRscorer
 * 对于在乎性别的用户,IDRscorer能够对物品或用户档案进行过滤。
 * 首先,可以先通过检查已经评价过的档案的性别,来猜测该用户所偏好
 * 的性别。然后,就可以过滤与之性别相反的档案。
 */
package unitFive;

import java.io.File;
import java.io.IOException;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.common.iterator.FileLineIterable;

public class GenderRescorer implements IDRescorer {
	/***
	 * 缓存更对对男性评价的用户
	 */
	private final FastIDSet men;
	private final FastIDSet women;
	private final FastIDSet usersRateMoreMen;
	private final FastIDSet usersRateLessMen;
	private final boolean filterMen;
	
	/**
	 * 构造函数
	 */
	public GenderRescorer(FastIDSet men, FastIDSet women, FastIDSet usersRateMoreMen, FastIDSet usersRateLessMen, long userID, DataModel model) throws TasteException{
		// TODO Auto-generated constructor stub
		this.men = men;
		this.women = women;
		this.usersRateMoreMen = usersRateMoreMen;
		this.usersRateLessMen = usersRateLessMen;
		this.filterMen = ratesMoreMen(userID, model);
	}
	
	/**
	 * 解析gender.dat并创建两个档案ID
	 * 	 */
	public static FastIDSet[] parseMenWomen(File genderFile) throws IOException{
		FastIDSet men = new FastIDSet(50000);
		FastIDSet women = new FastIDSet(50000);
		for(String line : new FileLineIterable(genderFile)){
			int comma = line.indexOf(',');
			char gender = line.charAt(comma + 1);
			if(gender == 'U'){
				continue;
			}
			long profileID = Long.parseLong(line.substring(0, comma));
			if(gender == 'M'){
				men.add(profileID);
			}else{
				women.add(profileID);
			}
		}
		men.rehash();                           //刷新
		women.rehash(); 						//刷新
		return new FastIDSet[]{men, women};
	}
	
	public boolean ratesMoreMen(long userID, DataModel model) throws TasteException{
		if(usersRateMoreMen.contains(userID)){
			return true;
		}
		if(usersRateLessMen.contains(userID)){
			return false;
		}
		PreferenceArray prefs = model.getPreferencesFromUser(userID);
		int menCount = 0;
		int womenCount = 0;
		for(int i = 0; i < prefs.length(); i++){
			long profileID = prefs.get(i).getItemID();
			if(men.contains(profileID)){
				menCount++;
			}else if(women.contains(profileID)){
				womenCount++;
			}
		}
		boolean ratesMoreMen = menCount > womenCount;          //对男性评分的用户可能更喜欢男性
		if(ratesMoreMen){
			usersRateMoreMen.add(userID);
		}else{
			usersRateLessMen.add(userID);
		}
		return ratesMoreMen;
	}
	
	@Override
	public double rescore(long profileID, double originalScore) {
		// TODO Auto-generated method stub
		return isFiltered(profileID) ? Double.NaN : originalScore;         //将被排除的值赋值为NaN
	}
	
	@Override
	public boolean isFiltered(long profileID) {
		// TODO Auto-generated method stub
		return filterMen ? men.contains(profileID) : women.contains(profileID);
	}

	

}
</span>

你可能感兴趣的:(Mahout实现基于性别的IDRescorer)