MapReduce实现推荐算法(同现矩阵*评分矩阵=推荐结果)

该讲主要是基于ItemCF的物品推荐。

1,常用的两大协同过滤算法。

a,基于物品的协同过滤算法
基于 item 的协同过滤,通过用户对不同 item 的评分来评测 item 之间的相似性,基于 item 之间的相似性做出推荐。简单来讲就是:给用户推荐和他之前喜欢的物品相似的物品。

b, 基于用户的协同过滤算法 UserCF
基于用户的协同过滤,通过不同用户对物品的评分来评测用户之间的相似性,基于用户之间的相似性做出推荐。简单来讲就是:给用户推荐和他兴趣相似的其他用户喜欢的物品。

数据源:

评分矩阵:

举个用户3对物品102是否感兴趣的例子:
用户3对所有物品的评分, 有评分说明用户3喜欢这东西

用户ID
物品ID1
用户评分
3
101
2
3
102
0
3
103
0
3
104
4
3
105
4.5
3
106
0
3
107
5

同现矩阵:
同现矩阵其实就是 物品与物品之间的关联度, 这个关联度由所有用户对所有物品的评分决定
比如第一条, 说明同时有3个用户喜欢 101 与 102
物品ID1
物品ID2
物品ID1与物品ID2关联权重
101
102
3
102
102
4
103
102
4
104
102
2
105
102
2
106
102
1
107
102
1


2,mapreduce代码一共分为5部分:

1,去除数据中的重复记录:
package com.laoxiao.tuijian;

import java.io.IOException;
import java.util.Map;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 去重复
 * @author root
 *
 */
public class Step1 {

	
	public static boolean run(Configuration config,Map paths){
		try {
			FileSystem fs =FileSystem.get(config);
			Job job =Job.getInstance(config);
			job.setJobName("step1");
			job.setJarByClass(Step1.class);
			job.setMapperClass(Step1_Mapper.class);
			job.setReducerClass(Step1_Reducer.class);
//			
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(NullWritable.class);
			
			
			
			FileInputFormat.addInputPath(job, new Path(paths.get("Step1Input")));
			Path outpath=new Path(paths.get("Step1Output"));
			if(fs.exists(outpath)){
				fs.delete(outpath,true);
			}
			FileOutputFormat.setOutputPath(job, outpath);
			
			boolean f= job.waitForCompletion(true);
			return f;
		} catch (Exception e) {
			e.printStackTrace();
		}
		return false;
	}
	
	 static class Step1_Mapper extends Mapper{

		protected void map(LongWritable key, Text value,
				Context context)
				throws IOException, InterruptedException {
			if(key.get()!=0){//为了去掉表头
				context.write(value, NullWritable.get());
			}
		}
	}
	
	 
	 static class Step1_Reducer extends Reducer{

			protected void reduce(Text key, Iterable i,
					Context context)
					throws IOException, InterruptedException {
				context.write(key,NullWritable.get());
			}
		}
}
2,按照用户分组,得出用户对物品的推荐得分矩阵
package com.laoxiao.tuijian;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;

/**
 * 按用户分组,计算所有物品出现的组合列表,得到用户对物品的推荐得分矩阵
u13	i160:1,
u14	i25:1,i223:1,
u16	i252:1,
u21	i266:1,
u24	i64:1,i218:1,i185:1,
u26	i276:1,i201:1,i348:1,i321:1,i136:1,
 * @author root
 *
 */
public class Step2 {

	
	public static boolean run(Configuration config,Map paths){
		try {
//			config.set("mapred.jar", "C:\\Users\\Administrator\\Desktop\\wc.jar");
			FileSystem fs =FileSystem.get(config);
			Job job =Job.getInstance(config);
			job.setJobName("step2");
			job.setJarByClass(StartRun.class);
			job.setMapperClass(Step2_Mapper.class);
			job.setReducerClass(Step2_Reducer.class);
//			
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(Text.class);
			
			
			
			FileInputFormat.addInputPath(job, new Path(paths.get("Step2Input")));
			Path outpath=new Path(paths.get("Step2Output"));
			if(fs.exists(outpath)){
				fs.delete(outpath,true);
			}
			FileOutputFormat.setOutputPath(job, outpath);
			
			boolean f= job.waitForCompletion(true);
			return f;
		} catch (Exception e) {
			e.printStackTrace();
		}
		return false;
	}
	
	 static class Step2_Mapper extends Mapper{

		protected void map(LongWritable key, Text value,
				Context context)
				throws IOException, InterruptedException {
			String[]  tokens=value.toString().split(",");
			String item=tokens[0];//取得物品id
			String user=tokens[1];//取得用户id
			String action =tokens[2];//取得用户行为
			Text k= new Text(user);
			Integer rv =StartRun.R.get(action);//从一个map集合中得出某种行为应该得到的分数权重
//			if(rv!=null){
			Text v =new Text(item+":"+ rv.intValue());
			context.write(k, v);//按照userid分组,values是物品id:分数
		}
	}
	
	 
	 static class Step2_Reducer extends Reducer{

			protected void reduce(Text key, Iterable i,
					Context context)
					throws IOException, InterruptedException {
				Map r =new HashMap();//为了累加对同样一件物品的多次评分
				
				for(Text value :i){//循环每个用户,对不同物品的打分情况
					String[] vs =value.toString().split(":");
					String item=vs[0];//这个是物品Id
					Integer action=Integer.parseInt(vs[1]);//这个是该物品的得分
					action = ((Integer) (r.get(item)==null?  0:r.get(item))).intValue() + action;
					r.put(item,action);
				}
				StringBuffer sb =new StringBuffer();
				for(Entry entry :r.entrySet() ){
					sb.append(entry.getKey()+":"+entry.getValue().intValue()+",");
				}
				
				context.write(key,new Text(sb.toString()));
			}
		}
}
3,计算物品的同现矩阵
package com.laoxiao.tuijian;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;
/**
 * 对物品组合列表进行计数,建立物品的同现矩阵
i100:i100	3
i100:i105	1
i100:i106	1
i100:i109	1
i100:i114	1
i100:i124	1
 * @author root
 *
 */
public class Step3 {
	 private final static Text K = new Text();
     private final static IntWritable V = new IntWritable(1);
	
	public static boolean run(Configuration config,Map paths){
		try {
			FileSystem fs =FileSystem.get(config);
			Job job =Job.getInstance(config);
			job.setJobName("step3");
			job.setJarByClass(StartRun.class);
			job.setMapperClass(Step3_Mapper.class);
			job.setReducerClass(Step3_Reducer.class);
			job.setCombinerClass(Step3_Reducer.class);
//			
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(IntWritable.class);
			
			
			
			FileInputFormat.addInputPath(job, new Path(paths.get("Step3Input")));
			Path outpath=new Path(paths.get("Step3Output"));
			if(fs.exists(outpath)){
				fs.delete(outpath,true);
			}
			FileOutputFormat.setOutputPath(job, outpath);
			
			boolean f= job.waitForCompletion(true);
			return f;
		} catch (Exception e) {
			e.printStackTrace();
		}
		return false;
	}
	
	 static class Step3_Mapper extends Mapper{

		protected void map(LongWritable key, Text value,
				Context context)
				throws IOException, InterruptedException {
			String[]  tokens=value.toString().split("\t");
			String[] items =tokens[1].split(",");//取得每一个物品id和对应的评分
			for (int i = 0; i < items.length; i++) {
				String itemA = items[i].split(":")[0];//取得物品id
				for (int j = 0; j < items.length; j++) {
					String itemB = items[j].split(":")[0];
					K.set(itemA+":"+itemB);//设置物品之间的同现,次数为1,因为在一次用户行为中
					context.write(K, V);
				}
			}
			
		}
	}
	
	 
	 static class Step3_Reducer extends Reducer{

			protected void reduce(Text key, Iterable i,
					Context context)
					throws IOException, InterruptedException {
				int sum =0;
				for(IntWritable v :i ){
					sum =sum+v.get();
				}
				V.set(sum);
				context.write(key, V);
			}
		}
	 
}

4,同现矩阵和得分矩阵相乘
(待续)

你可能感兴趣的:(用户推荐)