Introduction to Recommender Systems-Programming Assignment 1

Overview

This assignment will explore non-personalized recommendations. You will be given a program stub and a data set in .csv format. You will use these to write a program that makes basic, non-personalized recommendations.

Download Ratings Data — This is a comma-separated values file, with the user number, movie ID, and rating, in that order.

Download Movies List — Decoding the movie title is not required for this assignment, but are you curious which movie is which in the ratings file? Use this file to find out!

Download Users List — Decoding the unique user id is not required for this assignment, but use this file to find your user number using the unique identifier that was provided to you after rating the movies.

Download Java Stub (be sure you can successfully compile and run before making changes)

Notes

This assignment requires you to write the code needed to parse the ratings file. It is up to you how you do this (including whether you skip ahead and use LensKit data structures or simply build your own matrix). It is particularly important to make sure you can distinguish between rated and non-rated cells in your matrix.

Deliverables

There are 2 deliverables for this assignment. Each deliverable represents a different analysis of the data provided to you. For each deliverable, you will submit a list of the top 5 movies that occur with movies A, B, and C; where A, B, and C will be uniquely assigned to you. Do this for each of the two association formulas described in class (remember, your movie is x, and you are looking for the other movies y that maximize the formula values):

  1. Simple: (x and y) / x
  2. Advanced: ((x and y) / x) / ((!x and y) / !x)

Output Format

For each formula, your output should be as CSV file (a file of comma-separated values) defined as follows:

Each file will have three rows (one for each movie you're computing associations for). Each row will have the movie ID of the movie assigned to you, followed by five pairs of "movie ID,predicted-score", from first to last, showing the top-five associated movies using that formula.

Note: You will be graded on both choosing the right movies and getting correct scores (rounded to the hundredths place), therefore you should provide at least two decimal places precision on your predicted scores.



第一周的作业较为简单,给一些用户信息,电影信息,评分信息,要按照上面两个式子计算出与某个指定电影相关度最高的五部电影。处理时需要统计出相同用户的评价个数,为了便于查找,采用嵌套的map存储信息较好。

第一问代码:

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;


public class Rsp1 {

	 public static void main(String[] args) throws Exception {
		 Map<String,ArrayList<Integer>> map = new HashMap<String, ArrayList<Integer>>();
		 Map<Integer,String> mmap=new HashMap<Integer,String>();
		 
		 FileReader fr = new FileReader("ratings.csv");
		 BufferedReader br = new BufferedReader(fr);
	     String line = new String();
	     String outline[] = new String[4];
	     
	     while((line=br.readLine())!=null){
	    	 outline=line.split(",",-1);
		     //have not found the movie
		     if(map.containsKey(outline[1])==false){
		    	 ArrayList<Integer> array = new ArrayList<Integer>();
		    	 array.add(Integer.parseInt(outline[0]));
		    	 map.put(outline[1],array);
		     }
		     //the movie exist
		     else{
		    	 ArrayList<Integer> array = map.get(outline[1]);
		    	 array.add(Integer.parseInt(outline[0]));
		     }
	     }
	     br.close();
	     fr.close();
	     
	     
	     //change the movieID here
	     String movieid="453";
	     
	     
	     int count[]=new int[102];
	     int k=0;
	     ArrayList<Integer> array1=map.get(movieid);
	     
	     Iterator it = map.entrySet().iterator();
	     while (it.hasNext()) {
	    	 Map.Entry entry = (Map.Entry) it.next();
	    	 count[k]=0;
	    	 String key = (String)entry.getKey();
	    	 mmap.put(k, key);
	    	 ArrayList<Integer> array=(ArrayList<Integer>)entry.getValue();
	    	 for(int i=0;i<array.size();i++){
	    		 if(array1.contains(array.get(i))){
	    			 count[k]++;
	    		 }
	    	 }
	    	 k++;
	     }
	     int number[]=new int[102];
	     for(int i=0;i<k;i++){
	    	 number[i]=i;
	     }
	     for(int i=0;i<k;i++){
	    	 for(int j=i+1;j<k;j++){
	    		 if(count[i]<count[j]){
	    			 int temp=count[i];
	    			 count[i]=count[j];
	    			 count[j]=temp;
	    			 int temp2=number[i];
	    			 number[i]=number[j];
	    			 number[j]=temp2;
	    		 }
	    	 }
	     }
	     System.out.print(movieid+",");
	     System.out.print(mmap.get(number[1])+","+(double)count[1]/count[0] +",");
	     System.out.print(mmap.get(number[2])+","+(double)count[2]/count[0]+",");
	     System.out.print(mmap.get(number[3])+","+(double)count[3]/count[0]+",");
	     System.out.print(mmap.get(number[4])+","+(double)count[4]/count[0]+",");
	     System.out.print(mmap.get(number[5])+","+(double)count[5]/count[0]);  
	 }
}

第二问代码:

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;


public class Rsp2 {
	 public static void main(String[] args) throws Exception {
		 
		 //get the movie and its rated user
		 Map<String,ArrayList<Integer>> map = new HashMap<String, ArrayList<Integer>>();
		 Map<Integer,String> mmap=new HashMap<Integer,String>();
		 FileReader fr = new FileReader("ratings.csv");
		 BufferedReader br = new BufferedReader(fr);
	     String line = new String();
	     String outline[] = new String[4];
	     while((line=br.readLine())!=null){
	    	 outline=line.split(",",-1);
		     //have not found the movie
		     if(map.containsKey(outline[1])==false){
		    	 ArrayList<Integer> array = new ArrayList<Integer>();
		    	 array.add(Integer.parseInt(outline[0]));
		    	 map.put(outline[1],array);
		     }
		     //the movie exist
		     else{
		    	 ArrayList<Integer> array = map.get(outline[1]);
		    	 array.add(Integer.parseInt(outline[0]));
		     }
	     }
	     
	     //get user list
	     ArrayList<Integer> user= new ArrayList<Integer>();
	     fr=new FileReader("users.csv");
	     br=new BufferedReader(fr);
	     while((line=br.readLine())!=null){
	    	 outline=line.split(",",-1);
	    	 user.add(Integer.parseInt(outline[0]));
	     }
	     fr.close();
	     br.close();
	     
	    
	    
	    //change the movieID here
		 String movieid="453";
		 
		 
		 int part1[]=new int[102];
		 int part2[]=new int[102];
		 int k=0;
		 ArrayList<Integer> array1=map.get(movieid);
		 
		 Iterator it = map.entrySet().iterator();
		 while (it.hasNext()) {
			 Map.Entry entry = (Map.Entry) it.next();
			 part1[k]=0;
			 part2[k]=0;
			 String key = (String)entry.getKey();
			 mmap.put(k, key);
			 ArrayList<Integer> array=(ArrayList<Integer>)entry.getValue();
			 for(int i=0;i<array.size();i++){
				 if(array1.contains(array.get(i))){
					 part1[k]++;
				 }
			 }
			 part2[k]=array.size()-part1[k];
			 k++;
		 }
	    
		 double sum[]=new double[102];
		 
		 for(int i=0;i<k;i++){
			 sum[i]=((double)part1[i]/array1.size())/((double)part2[i]/(user.size()-array1.size()));
		 }
		 
		 int number[]=new int[102];
	     for(int i=0;i<k;i++){
	    	 number[i]=i;
	     }
	     for(int i=0;i<k;i++){
	    	 for(int j=i+1;j<k;j++){
	    		 if(sum[i]<sum[j]){
	    			 double temp=sum[i];
	    			 sum[i]=sum[j];
	    			 sum[j]=temp;
	    			 int temp2=number[i];
	    			 number[i]=number[j];
	    			 number[j]=temp2;
	    		 }
	    	 }
	     }
	     System.out.print(movieid+",");
	     System.out.print(mmap.get(number[1])+","+sum[1] +",");
	     System.out.print(mmap.get(number[2])+","+sum[2]+",");
	     System.out.print(mmap.get(number[3])+","+sum[3]+",");
	     System.out.print(mmap.get(number[4])+","+sum[4]+",");
	     System.out.print(mmap.get(number[5])+","+sum[5]);  
		 
	 }
}


你可能感兴趣的:(推荐系统)