日撸代码300行学习笔记 Day 54-55

1.基于 M-distance 的推荐

这是老师和学姐曾经发表过的一篇论文,主要是讲解关于评分系统的一个算法。

这篇中使用的是leave one out的方法, 即从头开始遍历,每次选出一个作为测试集,其余剩下的作为训练集,每次都进行打分,最后总分除以个数,得到预测均分。这个算法很公平,因为每个都当成了测试集来预测,但是由于数量庞大,如果说算法效率不够高的话,则花费时间将很多很多。

package machine;

import java.io.*;

public class MBR {

	/**
	 * Default rating for 1-5 points. 如果找不到邻居,默认为3分
	 */
	public static final double DEFAULT_RATING = 3.0;

	/**
	 * The total number of users.
	 */
	private int numUsers;

	/**
	 * The total number of items.
	 */
	private int numItems;

	/**
	 * The total number of ratings (non-zero values) 非零的评分个数
	 */
	private int numRatings;

	/**
	 * The predictions. 长度和numRatings一样
	 */
	private double[] predictions;

	/**
	 * Compressed rating matrix. User-item-rating triples. 压缩的评分矩阵,作为一个三元组来压缩的
	 */
	private int[][] compressedRatingMatrix;

	/**
	 * The degree of users (how many item he has rated). 用户到底看了多少的电影,一行非零个数
	 */
	private int[] userDegrees;

	/**
	 * The average rating of the current user. 用户平均评分
	 */
	private double[] userAverageRatings;

	/**
	 * The degree of users (how many item he has rated).
	 */
	private int[] itemDegrees;

	/**
	 * The average rating of the current item.
	 */
	private double[] itemAverageRatings;

	/**
	 * The first user start from 0. Let the first user has x ratings, the second
	 * user will start from x. 开端的下标,例如0,50等,可用于直接寻址
	 */
	private int[] userStartingIndices;

	/**
	 * Number of non-neighbor objects. 非邻居
	 */
	private int numNonNeighbors;

	/**
	 * The radius (delta) for determining the neighborhood. 相当于delta,评分的差距
	 */
	private double radius;

	/**
	 ************************* 
	 * Construct the rating matrix.
	 * 
	 * @param paraRatingFilename the rating filename.
	 * @param paraNumUsers       number of users
	 * @param paraNumItems       number of items
	 * @param paraNumRatings     number of ratings
	 ************************* 
	 */
	public MBR(String paraFilename, int paraNumUsers, int paraNumItems, int paraNumRatings) throws Exception {
		// Step 1. Initialize these arrays
		numItems = paraNumItems;
		numUsers = paraNumUsers;
		numRatings = paraNumRatings;

		userDegrees = new int[numUsers];
		userStartingIndices = new int[numUsers + 1];
		userAverageRatings = new double[numUsers];
		itemDegrees = new int[numItems];
		// 多少个评分,行数是总共用户的数量,三列
		compressedRatingMatrix = new int[numRatings][3];
		itemAverageRatings = new double[numItems];

		predictions = new double[numRatings];

		System.out.println("Reading " + paraFilename);

		// Step 2. Read the data file.
		File tempFile = new File(paraFilename);
		if (!tempFile.exists()) {
			System.out.println("File " + paraFilename + " does not exists.");
			System.exit(0);
		} // Of if
		BufferedReader tempBufReader = new BufferedReader(new FileReader(tempFile));
		String tempString;
		String[] tempStrArray;
		int tempIndex = 0;
		userStartingIndices[0] = 0;
		userStartingIndices[numUsers] = numRatings;
		// 一行一行的读
		while ((tempString = tempBufReader.readLine()) != null) {
			// Each line has three values
			tempStrArray = tempString.split(",");
			compressedRatingMatrix[tempIndex][0] = Integer.parseInt(tempStrArray[0]);
			compressedRatingMatrix[tempIndex][1] = Integer.parseInt(tempStrArray[1]);
			compressedRatingMatrix[tempIndex][2] = Integer.parseInt(tempStrArray[2]);

			// 看了就加一
			userDegrees[compressedRatingMatrix[tempIndex][0]]++;
			itemDegrees[compressedRatingMatrix[tempIndex][1]]++;

			if (tempIndex > 0) {
				// Starting to read the data of a new user.
				if (compressedRatingMatrix[tempIndex][0] != compressedRatingMatrix[tempIndex - 1][0]) {
					userStartingIndices[compressedRatingMatrix[tempIndex][0]] = tempIndex;
				} // Of if
			} // Of if
			tempIndex++;
		} // Of while
		tempBufReader.close();

		double[] tempUserTotalScore = new double[numUsers];
		double[] tempItemTotalScore = new double[numItems];
		for (int i = 0; i < numRatings; i++) {
			tempUserTotalScore[compressedRatingMatrix[i][0]] += compressedRatingMatrix[i][2];
			tempItemTotalScore[compressedRatingMatrix[i][1]] += compressedRatingMatrix[i][2];
		} // Of for i

		for (int i = 0; i < numUsers; i++) {
			userAverageRatings[i] = tempUserTotalScore[i] / userDegrees[i];
		} // Of for i
		for (int i = 0; i < numItems; i++) {
			itemAverageRatings[i] = tempItemTotalScore[i] / itemDegrees[i];
		} // Of for i
	}// Of the first constructor

	/**
	 ************************* 
	 * Set the radius (delta).
	 * 
	 * @param paraRadius The given radius.
	 ************************* 
	 */
	public void setRadius(double paraRadius) {
		if (paraRadius > 0) {
			radius = paraRadius;
		} else {
			radius = 0.1;
		} // Of if
	}// Of setRadius

	/**
	 ************************* 
	 * Leave-one-out prediction. The predicted values are stored in predictions.
	 * 
	 * @see predictions
	 ************************* 
	 */
	public void leaveOneOutPrediction() {
		double tempItemAverageRating;
		// Make each line of the code shorter.
		int tempUser, tempItem, tempRating;
		System.out.println("\r\nLeaveOneOutPrediction for radius " + radius);

		numNonNeighbors = 0;
		// 10万次,捂住一个,测一次
		for (int i = 0; i < numRatings; i++) {
			tempUser = compressedRatingMatrix[i][0];
			tempItem = compressedRatingMatrix[i][1];
			tempRating = compressedRatingMatrix[i][2];

			// Step 1. Recompute average rating of the current item.
			// 更新:前面乘:总分,减去Rating
			tempItemAverageRating = (itemAverageRatings[tempItem] * itemDegrees[tempItem] - tempRating)
					/ (itemDegrees[tempItem] - 1);

			// Step 2. 计算邻居,Recompute neighbors, at the same time obtain the ratings
			// Of neighbors.
			int tempNeighbors = 0;
			double tempTotal = 0;
			int tempComparedItem;
			// 当前用户,和下一个用户之间的
			for (int j = userStartingIndices[tempUser]; j < userStartingIndices[tempUser + 1]; j++) {
				tempComparedItem = compressedRatingMatrix[j][1];
				if (tempItem == tempComparedItem) {
					continue;// Ignore itself.
				} // Of if

				// 比较均值差距,也就是差距半径,例如小于delta的0.3
				if (Math.abs(tempItemAverageRating - itemAverageRatings[tempComparedItem]) < radius) {
					// 小于的话则加总共邻居就加一
					tempTotal += compressedRatingMatrix[j][2];
					tempNeighbors++;
				} // Of if
			} // Of for j

			// Step 3. Predict as the average value of neighbors.
			if (tempNeighbors > 0) {
				// 总分除以邻居个人,得到评分
				predictions[i] = tempTotal / tempNeighbors;
			} else {
				// 否则赋予一个默认值。
				predictions[i] = DEFAULT_RATING;
				// 找不到邻居的个数,随便统计一下吧,好像也没啥作用?
				numNonNeighbors++;
			} // Of if
		} // Of for i
	}// Of leaveOneOutPrediction

	/**
	 ************************* 
	 * Compute the MAE based on the deviation of each leave-one-out.
	 * 
	 * @author Fan Min
	 ************************* 
	 */
	public double computeMAE() throws Exception {
		double tempTotalError = 0;
		for (int i = 0; i < predictions.length; i++) {
			tempTotalError += Math.abs(predictions[i] - compressedRatingMatrix[i][2]);
		} // Of for i

		return tempTotalError / predictions.length;
	}// Of computeMAE

	/**
	 ************************* 
	 * Compute the MAE based on the deviation of each leave-one-out.
	 * 
	 * @author Fan Min
	 ************************* 
	 */
	public double computeRSME() throws Exception {
		double tempTotalError = 0;
		for (int i = 0; i < predictions.length; i++) {
			// 预测的减去实际的评分,再平方,相加,再除,再更号
			tempTotalError += (predictions[i] - compressedRatingMatrix[i][2])
					* (predictions[i] - compressedRatingMatrix[i][2]);
		} // Of for i

		double tempAverage = tempTotalError / predictions.length;

		return Math.sqrt(tempAverage);
	}// Of computeRSME

	/**
	 ************************* 
	 * The entrance of the program.
	 * 
	 * @param args Not used now.
	 ************************* 
	 */
	public static void main(String[] args) {
		try {
			MBR tempRecommender = new MBR("O:/javasampledata-master/movielens-943u1682m.txt", 943, 1682, 100000);// 10万个评分

			for (double tempRadius = 0.2; tempRadius < 0.6; tempRadius += 0.1) {
				tempRecommender.setRadius(tempRadius);

				tempRecommender.leaveOneOutPrediction();
				/*
				 * MAE: 预测3,实际3,差距为0 ,预测3.5,实际4,差距0.5,再除以总的个数,就是个平均误差。 mean absolute error
				 */
				double tempMAE = tempRecommender.computeMAE();
				/*
				 * RSME: 相当于就是另外一种评价方式(公式),在最后返回的时候开了更号,MAE是绝对距离的差值
				 */
				double tempRSME = tempRecommender.computeRSME();

				System.out.println("Radius = " + tempRadius + ", MAE = " + tempMAE + ", RSME = " + tempRSME
						+ ", numNonNeighbors = " + tempRecommender.numNonNeighbors);
			} // Of for tempRadius
		} catch (Exception ee) {
			System.out.println(ee);
		} // Of try
	}// Of main
}// Of class MBR

日撸代码300行学习笔记 Day 54-55_第1张图片

具体的代码步骤以及理解都在上面你的注释里面了,这里就没有单独写出来了。 这种算法与knn相比较还是有明显差别的,m-distence是用的一个阈值delta,相当于一个范围,在这个范围内的都是自己的邻居。

你可能感兴趣的:(学习,java)