代码来自闵老师”日撸 Java 三百行(51-60天)“,链接:日撸 Java 三百行(51-60天,kNN 与 NB)_闵帆的博客-CSDN博客
算法是基于M-distance的推荐,通过用户评分矩阵对用户进行电影推荐。论文为Mei Zheng, Fan Min, Heng-Ru Zhang, Wen-Bin Chen, Fast recommendations with the M-distance, IEEE Access 4 (2016) 1464–1468。论文可点击进行下载。数据集也可以通过闵老师原文的链接进行下载。数据集的名称为movielens-943u1682m.txt.
代码在基于条目推荐的时候使用了一个技巧,降低了算法的复杂度。因为数据是按照用户存储的,所以对于同一用户,存储的空间就是从userStartingIndices[tempUser]开始,至userStartingIndices[tempUser + 1]结束。这样在推荐计算的时候,就不需要遍历所有数据集,只需便利该用户对应的数据条数。
通过今天的代码又学到了一点儿,算法的实际思路不一定与底层内存存储的方式一一对应。例如,评分表里评分为0的条目,在算法实现的时候不需要管。矩阵(如下图,原论文中的图片)中缺失的数据,压根就没有读入条目,所以算邻居个数的时候不用剔除该用户评分为0的条目。自己刚开始理解代码的时候就整迷糊了。
第一个构造函数里,while循环中的第一个for循环中compressedRatingMatrix[i][0]的值是用户名,所以相当于tempUserTotalScore[]的地址是用户名,里面存的是该用户的总分。刚开始理解成指的是每一条数据的index,后来仔细想了想,发现compressedRatingMatrix[i][0]存的是compressedRatingMatrix[i][0]。是一个值,是用户名,比如User1或者User2等。最终数组的返回值都指向这个用户名,有点儿寻址的味道。
整个算法实现的java代码如下:
package machinelearning.knn;
/**
* Recommendation with M-distance.
* @author WX873
*/
import java.io.*;
import java.util.PrimitiveIterator.OfDouble;
public class MBR {
/**
* Default rating for 1-5 points.
*/
public static final double DEFAULT_RATING = 3.0;
/**
* The total number of users.
*/
private int numUsers;
/**
* The total number of items.
*/
private int numItems;
/**
* The total number of ratings (non-zero values)
*/
private int numRatings;
/**
* The predictions.
*/
private double[] predictions;
/**
* Compressed rating matrix. User-item-rating triples.
*/
private int[][] compressedRatingMatrix;
/**
* The degree of users (how many item he has rated).
*/
private int[] userDegrees;
/**
* The average rating of the current user.
*/
private double[] userAverageRatings;
/**
* The degree of items (how many users has rated this item).
*/
private int[] itemDegrees;
/**
* The average rating of the current item.
*/
private double[] itemAverageRatings;
/**
* The first user start from 0. Let the first user has x ratings, the second
* user will start from x. The start index x is for dataset's item.
*/
private int[] userStartingIndices;
/**
* Number of non-neighbor objects.
*/
private int numNonNeighbors;
/**
* The radius (delta) for determining the neighborhood.
*/
private double radius;
/**
* Construct the rating matrix.
*
* @param paraFilename The rating filename.
* @param paraNumUsers Number of users
* @param paraNumItems Number of items
* @param paraNumRatings Number of ratings
* @throws Exception
*/
public MBR(String paraFilename, int paraNumUsers, int paraNumItems, int paraNumRatings) throws Exception {
// Step 1. Initialize these arrays
numItems = paraNumItems;
numUsers = paraNumUsers;
numRatings = paraNumRatings;
userDegrees = new int [numUsers];
userStartingIndices = new int[numUsers + 1];
userAverageRatings = new double[numUsers];
itemDegrees = new int[numItems];
compressedRatingMatrix = new int[numRatings][3];
itemAverageRatings = new double[numItems];
predictions = new double[numRatings];
// Step 2. Read the data file.
File tempfile = new File(paraFilename);
if (!tempfile.exists()) {
System.out.println("File " + paraFilename + " does not exists.");
System.exit(0);
}//of if
BufferedReader tempBufReader = new BufferedReader(new FileReader(tempfile));
String tempString;
String[] tempStrArray;
int tempIndex = 0;
userStartingIndices[0] = 0;
userStartingIndices[numUsers] = numRatings;
while ((tempString = tempBufReader.readLine()) != null) {
// Each line has three values
tempStrArray = tempString.split(",");
compressedRatingMatrix[tempIndex][0] = Integer.parseInt(tempStrArray[0]);
compressedRatingMatrix[tempIndex][1] = Integer.parseInt(tempStrArray[1]);
compressedRatingMatrix[tempIndex][2] = Integer.parseInt(tempStrArray[2]);
userDegrees[compressedRatingMatrix[tempIndex][0]]++;
itemDegrees[compressedRatingMatrix[tempIndex][1]]++;
if (tempIndex > 0) {
// Starting to read the data of a new user.
if (compressedRatingMatrix[tempIndex][0] != compressedRatingMatrix[tempIndex - 1][0]) {
userStartingIndices[compressedRatingMatrix[tempIndex][0]] = tempIndex;
}//of if
}//of if
tempIndex++;
}//of while
tempBufReader.close();
double[] tempUserTotalScore = new double[numUsers];
double[] tempItemTotalScore = new double[numItems];
for (int i = 0; i < numRatings; i++) {
tempUserTotalScore[compressedRatingMatrix[i][0]] += compressedRatingMatrix[i][2]; //compressedRatingMatrix[i][0]的值是用户名,所以相当于tempUserTotalScore[]的地址是用户名,里面存的是该用户的总分
tempItemTotalScore[compressedRatingMatrix[i][1]] += compressedRatingMatrix[i][2]; //同上
}//of for i
for (int i = 0; i < numUsers; i++) {
userAverageRatings[i] = tempUserTotalScore[i]/userDegrees[i];
}//of for i
for (int i = 0; i < numItems; i++) {
itemAverageRatings[i] = tempItemTotalScore[i]/itemDegrees[i];
}//of for i
}//of the first constructor
/**
* *****************************************
* Set the radius (delta).
* @param paraRadius
* The given radius.
* *****************************************
*/
public void setRadius(double paraRadius) {
if (paraRadius > 0) {
radius = paraRadius;
}else {
radius = 0.1;
}//of if
}//of setRadius
/*
* ***************************************************************************
* Leave-one-out prediction item based. The predicted values are stored in predictions.
*
* @see predictions
* ***************************************************************************
*/
public void leaveOneOutPredictionBsaedItem() {
double tempItemAverageRating;
//Make each line of the code shorter.
int tempUser, tempItem, tempRating;
System.out.println("\r\nLeaveOneOutPrediction for radius " + radius);
numNonNeighbors = 0;
for (int i = 0; i < numRatings; i++) {
tempUser = compressedRatingMatrix[i][0];
tempItem = compressedRatingMatrix[i][1];
tempRating = compressedRatingMatrix[i][2];
//Step 1. Recompute average rating of the current item.
tempItemAverageRating = (itemAverageRatings[tempItem] * itemDegrees[tempItem] - tempRating)/(itemDegrees[tempItem] - 1);
// Step 2. Recompute neighbors, at the same time obtain the ratings of neighbors.
int tempNeighbors = 0;
double tempTotal = 0;
int tempComparedItem;
for (int j = userStartingIndices[tempUser]; j < userStartingIndices[tempUser + 1]; j++) {
tempComparedItem = compressedRatingMatrix[j][1];
if (tempItem == tempComparedItem) {
continue; //Ignore itself.
}//of if
if (Math.abs(tempItemAverageRating - itemAverageRatings[tempComparedItem]) < radius) {
//矩阵中缺失的数据,压根就没有读入条目,所以算邻居个数的时候不用剔除该用户评分为0的条目。
tempTotal += compressedRatingMatrix[j][2];
tempNeighbors++;
}//of if
}//of for j
//Step 3. Predict as the average value of neighbors.
if (tempNeighbors > 0) {
predictions[i] = tempTotal/tempNeighbors;
}else {
predictions[i] = DEFAULT_RATING;
numNonNeighbors++;
}//of if
}//of for i
}//of leaveOneOutPredictionBsaedItem
/****************************************************************
* Compute the MAE based on the deviation of each leave-one-out.
*
* @return
* @throws Exception
* **************************************************************
*/
public double computeMAE() throws Exception {
double tempTotalError = 0;
for (int i = 0; i < predictions.length; i++) {
tempTotalError += Math.abs(predictions[i] - compressedRatingMatrix[i][2]);
}//of for i
double tempAverage = tempTotalError / predictions.length;
return Math.sqrt(tempAverage);
}//of computeRSME
/****************************************************************
* Compute the RSME based on the deviation of each leave-one-out.
*
* @return
* @throws Exception
* **************************************************************
*/
public double computeRSME() throws Exception {
double tempTotalError = 0;
for (int i = 0; i < predictions.length; i++) {
tempTotalError += (predictions[i] - compressedRatingMatrix[i][2]) * (predictions[i] - compressedRatingMatrix[i][2]);
}//of for i
double tempAverage = tempTotalError / predictions.length;
return Math.sqrt(tempAverage);
}//of computeRSME
/***
* **************************************************
* The entrance of the program.
* @param args
* **************************************************
*/
public static void main(String args[]) {
try {
MBR tempRecommender = new MBR("E:/Datasets/UCIdatasets/temp/movielens-943u1682m.txt", 943, 1682, 100000);
for (double tempRadius = 0.2; tempRadius < 0.6; tempRadius += 0.1) {
tempRecommender.setRadius(tempRadius);
tempRecommender.leaveOneOutPredictionBsaedItem();
double tempMAE = tempRecommender.computeMAE();
double tempRSME = tempRecommender.computeRSME();
System.out.println("Radius = " + tempRadius + ", MAE = " + tempMAE + ", RSME = " + tempRSME
+ ", numNonNeighbors = " + tempRecommender.numNonNeighbors);
}//of for tempRadius
} catch (Exception e) {
// TODO: handle exception
System.out.println(e);
}//of try
}//of main
}//MBR