日撸代码300行:第55天(基于 M-distance 的推荐 (续))

代码来自闵老师”日撸 Java 三百行(51-60天)“,链接:https://blog.csdn.net/minfanphd/article/details/116975957

工作承接第54天的基于M-distance 的推荐,目标是自己实现一下user-based recommendation。原博客中的提示有两种方案,自己选在了便于实现的一个方案,但是算法复杂度较高。另外一个方案需要将包含同一个项目(item_{i})的用户保存,需要添加相应的代码。代码运行下来时间复杂度是比较高。

本人实现的代码如下:

package machinelearning.knn;

/**
 * Recommendation with M-distance.
 * @author WX873
 */
import java.io.*;
import java.util.PrimitiveIterator.OfDouble;


public class MBR {
	
	/**
	 * Default rating for 1-5 points.
	 */
	public static final double DEFAULT_RATING = 3.0;
	
	/**
	 * The total number of users.
	 */
	private int numUsers;
	
	/**
	 * The total number of items.
	 */
	private int numItems;
	
	/**
	 * The total number of ratings (non-zero values)
	 */
	private int numRatings;
	
	/**
	 * The predictions.
	 */
	private double[] predictions;
	
	/**
	 * Compressed rating matrix. User-item-rating triples.
	 */
	private int[][] compressedRatingMatrix;
	
	/**
	 * The degree of users (how many item he has rated).
	 */
	private int[] userDegrees;
	
	/**
	 * The average rating of the current user.
	 */
	private double[] userAverageRatings;
	
	/**
	 * The degree of items (how many users has rated this item).
	 */
	private int[] itemDegrees;
	
	/**
	 * The average rating of the current item.
	 */
	private double[] itemAverageRatings;
	
	/**
	 * The first user start from 0. Let the first user has x ratings, the second
	 * user will start from x. The start index x is for dataset's item.
	 */
	private int[] userStartingIndices;
	
	/**
	 * Number of non-neighbor objects.
	 */
	private int numNonNeighbors;
	
	/**
	 * The radius (delta) for determining the neighborhood.
	 */
	private double radius;
	
	/**
	 * Construct the rating matrix.
	 * 
	 * @param paraFilename   The rating filename.
	 * @param paraNumUsers   Number of users
	 * @param paraNumItems   Number of items
	 * @param paraNumRatings  Number of ratings
	 * @throws Exception
	 */
	public MBR(String paraFilename, int paraNumUsers, int paraNumItems, int paraNumRatings) throws Exception {
		// Step 1. Initialize these arrays
		numItems = paraNumItems;
		numUsers = paraNumUsers;
		numRatings = paraNumRatings;
		
		userDegrees = new int [numUsers];
		userStartingIndices = new int[numUsers + 1];
		userAverageRatings = new double[numUsers];
		itemDegrees = new int[numItems];
		compressedRatingMatrix = new int[numRatings][3];
		itemAverageRatings = new double[numItems];
		
		predictions = new double[numRatings];
		
		// Step 2. Read the data file.
		File tempfile = new File(paraFilename);
		if (!tempfile.exists()) {
			System.out.println("File " + paraFilename + " does not exists.");
			System.exit(0);
		}//of if
		BufferedReader tempBufReader = new BufferedReader(new FileReader(tempfile));
		String tempString;
		String[] tempStrArray;
		int tempIndex = 0;
		userStartingIndices[0] = 0;
		userStartingIndices[numUsers] = numRatings;
		while ((tempString = tempBufReader.readLine()) != null) {
			// Each line has three values
			tempStrArray = tempString.split(",");
			compressedRatingMatrix[tempIndex][0] = Integer.parseInt(tempStrArray[0]);
			compressedRatingMatrix[tempIndex][1] = Integer.parseInt(tempStrArray[1]);
			compressedRatingMatrix[tempIndex][2] = Integer.parseInt(tempStrArray[2]);
			
			userDegrees[compressedRatingMatrix[tempIndex][0]]++;
			itemDegrees[compressedRatingMatrix[tempIndex][1]]++;
			
			if (tempIndex > 0) {
				// Starting to read the data of a new user.
				if (compressedRatingMatrix[tempIndex][0] != compressedRatingMatrix[tempIndex - 1][0]) {
					userStartingIndices[compressedRatingMatrix[tempIndex][0]] = tempIndex;
				}//of if
			}//of if
			tempIndex++;
		}//of while
		tempBufReader.close();
		
		double[] tempUserTotalScore = new double[numUsers];
		double[] tempItemTotalScore = new double[numItems];
		for (int i = 0; i < numRatings; i++) {
			tempUserTotalScore[compressedRatingMatrix[i][0]] += compressedRatingMatrix[i][2];  
//compressedRatingMatrix[i][0]的值是用户名,所以相当于tempUserTotalScore[]的地址是用户名,里面存的是该用户的总分
			tempItemTotalScore[compressedRatingMatrix[i][1]] += compressedRatingMatrix[i][2];  //同上
		}//of for i
		
		for (int i = 0; i < numUsers; i++) {
			userAverageRatings[i] = tempUserTotalScore[i]/userDegrees[i];
		}//of for i
		for (int i = 0; i < numItems; i++) {
			itemAverageRatings[i] = tempItemTotalScore[i]/itemDegrees[i];
		}//of for i
	}//of the first constructor
	
	/**
	 * *****************************************
	 * Set the radius (delta).
	 * @param paraRadius
	 * 		The given radius.
	 * *****************************************
	 */
	public void setRadius(double paraRadius) {
		if (paraRadius > 0) {
			radius = paraRadius;
		}else {
			radius = 0.1;
		}//of if
		
	}//of setRadius
	
	/*
	 * ***************************************************************************
	 * Leave-one-out prediction user based. The predicted values are stored in predictions.
	 * 
	 * @see predictions
	 * ***************************************************************************
	 */
	public void leaveOneOutPredictionBsaedUser() {
		double tempUserAverageRating;
		//Make each line of the code shorter.
		int tempUser, tempItem, tempRating;
		System.out.println("\r\nLeaveOneOutPrediction for radius " + radius);
		
		numNonNeighbors = 0;
		for (int i = 0; i < numRatings; i++) {
			tempUser = compressedRatingMatrix[i][0];
			tempItem = compressedRatingMatrix[i][1];
			tempRating = compressedRatingMatrix[i][2];
			
			//Step 1. Recompute average rating of the current user.
			tempUserAverageRating = (userAverageRatings[tempUser] * userDegrees[tempUser] - tempRating)/(userDegrees[tempUser] - 1);
			
			// Step 2. Recompute neighbors, at the same time obtain the ratings of neighbors.
			int tempNeighbors = 0;
			double tempTotal = 0;
			int tempComparedUser;
			for (int j = 0; j < numRatings; j++) {
				tempComparedUser = compressedRatingMatrix[j][0];
				if (tempUser == tempComparedUser) {
					continue; //Ignore itself
				}//of if
				
				if (Math.abs(tempUserAverageRating - userAverageRatings[tempComparedUser]) < radius) {
					tempTotal += compressedRatingMatrix[j][2];
					tempNeighbors ++;
				}//of if
			}//of for j

			
			//Step 3. Predict as the average value of neighbors.
			if (tempNeighbors > 0) {
				predictions[i] = tempTotal/tempNeighbors;
			}else {
				predictions[i] = DEFAULT_RATING;
				numNonNeighbors++;
			}//of if
		}//of for i
	}//of leaveOneOutPredictionBsaedUser
	
	/****************************************************************
	 * Compute the MAE based on the deviation of each leave-one-out.
	 * 
	 * @return
	 * @throws Exception
	 * **************************************************************
	 */
	public double computeMAE() throws Exception {
		double tempTotalError = 0;
		for (int i = 0; i < predictions.length; i++) {
			tempTotalError += Math.abs(predictions[i] - compressedRatingMatrix[i][2]);
		}//of for i
		
		double tempAverage = tempTotalError / predictions.length;
		return Math.sqrt(tempAverage);
	}//of computeRSME
	
	/****************************************************************
	 * Compute the RSME based on the deviation of each leave-one-out.
	 * 
	 * @return
	 * @throws Exception
	 * **************************************************************
	 */
	public double computeRSME() throws Exception {
		double tempTotalError = 0;
		for (int i = 0; i < predictions.length; i++) {
			tempTotalError += (predictions[i] - compressedRatingMatrix[i][2]) * (predictions[i] - compressedRatingMatrix[i][2]);
		}//of for i
		
		double tempAverage = tempTotalError / predictions.length;
		return Math.sqrt(tempAverage);
	}//of computeRSME
	
	/***
	 * **************************************************
	 * The entrance of the program.
	 * @param args
	 * **************************************************
	 */
	public static void main(String args[]) {
		try {
			MBR tempRecommender = new MBR("E:/Datasets/UCIdatasets/temp/movielens-943u1682m.txt", 943, 1682, 100000);
			
			for (double tempRadius = 0.2; tempRadius < 0.6; tempRadius += 0.1) {
				tempRecommender.setRadius(tempRadius);
				
				tempRecommender.leaveOneOutPredictionBsaedUser();
				double tempMAE = tempRecommender.computeMAE();
				double tempRSME = tempRecommender.computeRSME();
				
				System.out.println("Radius = " + tempRadius + ", MAE = " + tempMAE + ", RSME = " + tempRSME
						+ ", numNonNeighbors = " + tempRecommender.numNonNeighbors);
			}//of for tempRadius
		} catch (Exception e) {
			// TODO: handle exception
			System.out.println(e);
		}//of try
	}//of main

}//MBR

你可能感兴趣的:(java,算法)