日撸代码300行:第54天(基于 M-distance 的推荐)

代码来自闵老师”日撸 Java 三百行(51-60天)“,链接:日撸 Java 三百行(51-60天,kNN 与 NB)_闵帆的博客-CSDN博客

算法是基于M-distance的推荐,通过用户评分矩阵对用户进行电影推荐。论文为Mei Zheng, Fan Min, Heng-Ru Zhang, Wen-Bin Chen, Fast recommendations with the M-distance, IEEE Access 4 (2016) 1464–1468。论文可点击进行下载。数据集也可以通过闵老师原文的链接进行下载。数据集的名称为movielens-943u1682m.txt.

代码在基于条目推荐的时候使用了一个技巧,降低了算法的复杂度。因为数据是按照用户存储的,所以对于同一用户,存储的空间就是从userStartingIndices[tempUser]开始,至userStartingIndices[tempUser + 1]结束。这样在推荐计算的时候,就不需要遍历所有数据集,只需便利该用户对应的数据条数。

通过今天的代码又学到了一点儿,算法的实际思路不一定与底层内存存储的方式一一对应。例如,评分表里评分为0的条目,在算法实现的时候不需要管。矩阵(如下图,原论文中的图片)中缺失的数据,压根就没有读入条目,所以算邻居个数的时候不用剔除该用户评分为0的条目。自己刚开始理解代码的时候就整迷糊了。

日撸代码300行:第54天(基于 M-distance 的推荐)_第1张图片

第一个构造函数里,while循环中的第一个for循环中compressedRatingMatrix[i][0]的值是用户名,所以相当于tempUserTotalScore[]的地址是用户名,里面存的是该用户的总分。刚开始理解成指的是每一条数据的index,后来仔细想了想,发现compressedRatingMatrix[i][0]存的是compressedRatingMatrix[i][0]。是一个值,是用户名,比如User1或者User2等。最终数组的返回值都指向这个用户名,有点儿寻址的味道。

整个算法实现的java代码如下:

package machinelearning.knn;

/**
 * Recommendation with M-distance.
 * @author WX873
 */
import java.io.*;
import java.util.PrimitiveIterator.OfDouble;


public class MBR {
	
	/**
	 * Default rating for 1-5 points.
	 */
	public static final double DEFAULT_RATING = 3.0;
	
	/**
	 * The total number of users.
	 */
	private int numUsers;
	
	/**
	 * The total number of items.
	 */
	private int numItems;
	
	/**
	 * The total number of ratings (non-zero values)
	 */
	private int numRatings;
	
	/**
	 * The predictions.
	 */
	private double[] predictions;
	
	/**
	 * Compressed rating matrix. User-item-rating triples.
	 */
	private int[][] compressedRatingMatrix;
	
	/**
	 * The degree of users (how many item he has rated).
	 */
	private int[] userDegrees;
	
	/**
	 * The average rating of the current user.
	 */
	private double[] userAverageRatings;
	
	/**
	 * The degree of items (how many users has rated this item).
	 */
	private int[] itemDegrees;
	
	/**
	 * The average rating of the current item.
	 */
	private double[] itemAverageRatings;
	
	/**
	 * The first user start from 0. Let the first user has x ratings, the second
	 * user will start from x. The start index x is for dataset's item.
	 */
	private int[] userStartingIndices;
	
	/**
	 * Number of non-neighbor objects.
	 */
	private int numNonNeighbors;
	
	/**
	 * The radius (delta) for determining the neighborhood.
	 */
	private double radius;
	
	/**
	 * Construct the rating matrix.
	 * 
	 * @param paraFilename   The rating filename.
	 * @param paraNumUsers   Number of users
	 * @param paraNumItems   Number of items
	 * @param paraNumRatings  Number of ratings
	 * @throws Exception
	 */
	public MBR(String paraFilename, int paraNumUsers, int paraNumItems, int paraNumRatings) throws Exception {
		// Step 1. Initialize these arrays
		numItems = paraNumItems;
		numUsers = paraNumUsers;
		numRatings = paraNumRatings;
		
		userDegrees = new int [numUsers];
		userStartingIndices = new int[numUsers + 1];
		userAverageRatings = new double[numUsers];
		itemDegrees = new int[numItems];
		compressedRatingMatrix = new int[numRatings][3];
		itemAverageRatings = new double[numItems];
		
		predictions = new double[numRatings];
		
		// Step 2. Read the data file.
		File tempfile = new File(paraFilename);
		if (!tempfile.exists()) {
			System.out.println("File " + paraFilename + " does not exists.");
			System.exit(0);
		}//of if
		BufferedReader tempBufReader = new BufferedReader(new FileReader(tempfile));
		String tempString;
		String[] tempStrArray;
		int tempIndex = 0;
		userStartingIndices[0] = 0;
		userStartingIndices[numUsers] = numRatings;
		while ((tempString = tempBufReader.readLine()) != null) {
			// Each line has three values
			tempStrArray = tempString.split(",");
			compressedRatingMatrix[tempIndex][0] = Integer.parseInt(tempStrArray[0]);
			compressedRatingMatrix[tempIndex][1] = Integer.parseInt(tempStrArray[1]);
			compressedRatingMatrix[tempIndex][2] = Integer.parseInt(tempStrArray[2]);
			
			userDegrees[compressedRatingMatrix[tempIndex][0]]++;
			itemDegrees[compressedRatingMatrix[tempIndex][1]]++;
			
			if (tempIndex > 0) {
				// Starting to read the data of a new user.
				if (compressedRatingMatrix[tempIndex][0] != compressedRatingMatrix[tempIndex - 1][0]) {
					userStartingIndices[compressedRatingMatrix[tempIndex][0]] = tempIndex;
				}//of if
			}//of if
			tempIndex++;
		}//of while
		tempBufReader.close();
		
		double[] tempUserTotalScore = new double[numUsers];
		double[] tempItemTotalScore = new double[numItems];
		for (int i = 0; i < numRatings; i++) {
			tempUserTotalScore[compressedRatingMatrix[i][0]] += compressedRatingMatrix[i][2];  //compressedRatingMatrix[i][0]的值是用户名,所以相当于tempUserTotalScore[]的地址是用户名,里面存的是该用户的总分
			tempItemTotalScore[compressedRatingMatrix[i][1]] += compressedRatingMatrix[i][2];  //同上
		}//of for i
		
		for (int i = 0; i < numUsers; i++) {
			userAverageRatings[i] = tempUserTotalScore[i]/userDegrees[i];
		}//of for i
		for (int i = 0; i < numItems; i++) {
			itemAverageRatings[i] = tempItemTotalScore[i]/itemDegrees[i];
		}//of for i
	}//of the first constructor
	
	/**
	 * *****************************************
	 * Set the radius (delta).
	 * @param paraRadius
	 * 		The given radius.
	 * *****************************************
	 */
	public void setRadius(double paraRadius) {
		if (paraRadius > 0) {
			radius = paraRadius;
		}else {
			radius = 0.1;
		}//of if
		
	}//of setRadius
	
	/*
	 * ***************************************************************************
	 * Leave-one-out prediction item based. The predicted values are stored in predictions.
	 * 
	 * @see predictions
	 * ***************************************************************************
	 */
	public void leaveOneOutPredictionBsaedItem() {
		double tempItemAverageRating;
		//Make each line of the code shorter.
		int tempUser, tempItem, tempRating;
		System.out.println("\r\nLeaveOneOutPrediction for radius " + radius);
		
		numNonNeighbors = 0;
		for (int i = 0; i < numRatings; i++) {
			tempUser = compressedRatingMatrix[i][0];
			tempItem = compressedRatingMatrix[i][1];
			tempRating = compressedRatingMatrix[i][2];
			
			//Step 1. Recompute average rating of the current item.
			tempItemAverageRating = (itemAverageRatings[tempItem] * itemDegrees[tempItem] - tempRating)/(itemDegrees[tempItem] - 1);
			
			// Step 2. Recompute neighbors, at the same time obtain the ratings of neighbors.
			int tempNeighbors = 0;
			double tempTotal = 0;
			int tempComparedItem;
			for (int j = userStartingIndices[tempUser]; j < userStartingIndices[tempUser + 1]; j++) {
				tempComparedItem = compressedRatingMatrix[j][1];
				if (tempItem == tempComparedItem) {
					continue; //Ignore itself.
				}//of if
				
				if (Math.abs(tempItemAverageRating - itemAverageRatings[tempComparedItem]) < radius) {
					//矩阵中缺失的数据,压根就没有读入条目,所以算邻居个数的时候不用剔除该用户评分为0的条目。
					tempTotal += compressedRatingMatrix[j][2];
					tempNeighbors++;
				}//of if 
			}//of for j
			
			//Step 3. Predict as the average value of neighbors.
			if (tempNeighbors > 0) {
				predictions[i] = tempTotal/tempNeighbors;
			}else {
				predictions[i] = DEFAULT_RATING;
				numNonNeighbors++;
			}//of if
		}//of for i
	}//of leaveOneOutPredictionBsaedItem
	
	
	/****************************************************************
	 * Compute the MAE based on the deviation of each leave-one-out.
	 * 
	 * @return
	 * @throws Exception
	 * **************************************************************
	 */
	public double computeMAE() throws Exception {
		double tempTotalError = 0;
		for (int i = 0; i < predictions.length; i++) {
			tempTotalError += Math.abs(predictions[i] - compressedRatingMatrix[i][2]);
		}//of for i
		
		double tempAverage = tempTotalError / predictions.length;
		return Math.sqrt(tempAverage);
	}//of computeRSME
	
	/****************************************************************
	 * Compute the RSME based on the deviation of each leave-one-out.
	 * 
	 * @return
	 * @throws Exception
	 * **************************************************************
	 */
	public double computeRSME() throws Exception {
		double tempTotalError = 0;
		for (int i = 0; i < predictions.length; i++) {
			tempTotalError += (predictions[i] - compressedRatingMatrix[i][2]) * (predictions[i] - compressedRatingMatrix[i][2]);
		}//of for i
		
		double tempAverage = tempTotalError / predictions.length;
		return Math.sqrt(tempAverage);
	}//of computeRSME
	
	/***
	 * **************************************************
	 * The entrance of the program.
	 * @param args
	 * **************************************************
	 */
	public static void main(String args[]) {
		try {
			MBR tempRecommender = new MBR("E:/Datasets/UCIdatasets/temp/movielens-943u1682m.txt", 943, 1682, 100000);
			
			for (double tempRadius = 0.2; tempRadius < 0.6; tempRadius += 0.1) {
				tempRecommender.setRadius(tempRadius);
				
				tempRecommender.leaveOneOutPredictionBsaedItem();
				
				double tempMAE = tempRecommender.computeMAE();
				double tempRSME = tempRecommender.computeRSME();
				
				System.out.println("Radius = " + tempRadius + ", MAE = " + tempMAE + ", RSME = " + tempRSME
						+ ", numNonNeighbors = " + tempRecommender.numNonNeighbors);
			}//of for tempRadius
		} catch (Exception e) {
			// TODO: handle exception
			System.out.println(e);
		}//of try
	}//of main

}//MBR

你可能感兴趣的:(java,算法)