基于用户的协同过滤推荐算法UserCF:给用户推荐和他兴趣相似的其他用户喜欢的物品
模拟情景
用户 A B C D E F
商品 1 2 3 4 5 6
行为 点击 1.0分 搜索 2.0分 收藏 5.0分 付款 10.0分
用户行为列表如下
用户 物品 行为
A 1 点击
A 3 收藏
A 4 搜索
B 2 搜索
B 5 搜索
C 1 收藏
C 6 付款
D 1 付款
D 5 收藏
E 3 收藏
E 4 点击
F 2 收藏
F 3 搜索
F 6 点击
算法步骤
1.根据用户行为列表计算物品、用户的评分矩阵
1 2 3 4 5 6
A 1 0 5 3 0 0
B 0 3 0 0 3 0
C 5 0 0 0 0 10
D 10 0 0 0 5 0
E 0 0 5 1 0 0
F 0 5 3 0 0 1
2.根据评分矩阵计算用户与用户相似度矩阵
将所有用户两两计算相似度
A B C D E F
A 1 0 0.08 0.15 0.93 0.43
B 0 1 0 0.32 0 0.6
C 0.08 0 1 0.4 0 0.15
D 0.15 0.32 0.4 1 0 0
E 0.93 0 0 0 1 0.5
F 0.43 0.6 0.15 0 0.5 1
3.相似度矩阵X评分矩阵=推荐列表
1 2 3 4 5 6
A 2.9 2.2 11.0 3.9 0.8 1.2
B 3.2 6.0 1.8 0 4.6 0.6
C 9.1 0.8 0.9 0.2 2.0 10.2
D 12.2 1.0 0.8 0.5 6.0 4.0
E 0.9 2.5 11.2 3.8 0 0.5
F 1.2 6.8 7.7 1.82 1.8 2.5
4.在推荐列表中,将之前产生过操作的物品(即评分矩阵中有过评分的物品)置零
1 2 3 4 5 6
A 0 2.2 0 0 0.8 1.2
B 3.2 0 1.8 0 0 0.6
C 0 0.8 0.9 0.2 2.0 0
D 0 1.0 0.8 0.5 0 4.0
E 0.9 2.5 0 0 0 0.5
F 1.2 0 0 1.82 1.8 0
项目目录:
输入文件如下
MapReduce步骤
1.根据用户行为列表构建评分矩阵
输入:用户ID,物品ID,分值
输出:用户ID(行)——物品ID(列)——分值
代码:
mapper1
package step1;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* @author liyijie
* @date 2018年5月13日下午10:36:18
* @email [email protected]
* @remark
* @version
*
* 根据用户行为列表得到用户、物品的评分矩阵
*/
public class Mapper1 extends Mapper {
private Text outKey = new Text();
private Text outValue = new Text();
/**
* key:行号1
* value:A,1,1 用户A对物品1有过点击操作(分值1)
* */
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] values = value.toString().split(",");
String userID = values[0];
String itemID = values[1];
String score = values[2];
//key:列号 用户ID value:行号_值 物品ID_分值
outKey.set(userID);
outValue.set(itemID+"_"+score);
context.write(outKey, outValue);
}
}
reducer1
package step1;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
* @author liyijie
* @date 2018年5月13日下午10:56:28
* @email [email protected]
* @remark
* @version
*
*
* 根据用户行为列表得到用户、物品的评分矩阵
*/
public class Reducer1 extends Reducer {
private Text outKey = new Text();
private Text outValue = new Text();
///key:列号 用户ID value:行号_值 物品ID_分值
@Override
protected void reduce(Text key, Iterable values, Context context)
throws IOException, InterruptedException {
String itemID=key.toString();
//userID,score
Map map = new HashMap<>();
//text:行号_值
for(Text value:values){
String[] split = value.toString().split("_");
String userID = split[0];
String score = split[1];
if(map.get(userID)==null){
map.put(userID, Integer.parseInt(score));
}else{
Integer preScore = map.get(userID);
map.put(userID, preScore+Integer.parseInt(score));
}
}
StringBuilder sb = new StringBuilder();
for(Map.Entry entry:map.entrySet()){
String userID = entry.getKey();
String score = String.valueOf(entry.getValue());
sb.append(userID).append("_").append(score).append(",");
}
String line = null;
if(sb.toString().endsWith(",")){
line = sb.substring(0, sb.length()-1);
}
//key:行号 物品ID value:列号_值,列号_值,列号_值,列号_值,列号_值... 用户ID_分值
outKey.set(itemID);
outValue.set(line);
context.write(outKey,outValue);
}
}
mr1
package step1;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* @author liyijie
* @date 2018年5月13日下午11:07:13
* @email [email protected]
* @remark
* @version
*
* 根据用户行为列表得到用户、物品的评分矩阵
*/
public class MR1 {
private static String inputPath = "/UserCF/step1_input/actionList.txt";
private static String outputPath = "/UserCF/step1_output";
private static String hdfs = "hdfs://node1:9000";
public int run(){
try {
Configuration conf=new Configuration();
conf.set("fs.defaultFS", hdfs);
Job job = Job.getInstance(conf,"step1");
//配置任务map和reduce类
job.setJarByClass(MR1.class);
job.setJar("F:\\eclipseworkspace\\UserCF\\UserCF.jar");
job.setMapperClass(Mapper1.class);
job.setReducerClass(Reducer1.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileSystem fs = FileSystem.get(conf);
Path inpath = new Path(inputPath);
if(fs.exists(inpath)){
FileInputFormat.addInputPath(job,inpath);
}else{
System.out.println(inpath);
System.out.println("不存在");
}
Path outpath = new Path(outputPath);
fs.delete(outpath,true);
FileOutputFormat.setOutputPath(job, outpath);
return job.waitForCompletion(true)?1:-1;
} catch (ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return -1;
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
int result = -1;
result = new MR1().run();
if(result==1){
System.out.println("step1运行成功");
}else if(result==-1){
System.out.println("step1运行失败");
}
}
}
输出结果
2.利用评分矩阵构建用户与用户的相似度矩阵
输入:步骤1输出
缓存:步骤1输出
(输出和缓存是相同的文件)
输出:用户ID(行)——用户ID(列)——相似度
代码:
mapper2
package step2;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* @author liyijie
* @date 2018年5月13日下午11:43:51
* @email [email protected]
* @remark
* @version
*
*
* 利用评分矩阵构建用户与用户的相似度矩阵
*/
public class Mapper2 extends Mapper {
private Text outKey = new Text();
private Text outValue = new Text();
private List cacheList = new ArrayList();
// 右矩阵列值 下标右行 右值
//private Map cacheMap = new HashMap<>();
private DecimalFormat df = new DecimalFormat("0.00");
/**在map执行之前会执行这个方法,只会执行一次
*
* 通过输入流将全局缓存中的矩阵读入一个java容器中
*/
@Override
protected void setup(Context context)throws IOException, InterruptedException {
super.setup(context);
FileReader fr = new FileReader("itemUserScore1");
BufferedReader br = new BufferedReader(fr);
//右矩阵
//key:行号 物品ID value:列号_值,列号_值,列号_值,列号_值,列号_值... 用户ID_分值
String line = null;
while((line=br.readLine())!=null){
cacheList.add(line);
/**String[] cloumnAndLine_matrix2 = line.split("\t");
String itemID = cloumnAndLine_matrix2[0];
String[] row_value_array_matrix2 =cloumnAndLine_matrix2[1].split(",");
String[] row_value_list_matrix2 = new String[row_value_array_matrix2.length];
for(int i = 0;i cloumns_matrix2 = cacheMap.keySet();
for(String cloumn_matrix2:cloumns_matrix2){
//矩阵两位相乘得到的结果 分子
int numerator = 0;
String[] row_value_list_matrix2 = cacheMap.get(cloumn_matrix2);//取右矩阵第n行 即是N物品所有的用户评分
//计算右侧矩阵行的空间距离
double denominator2 = 0;
for(String column_value:row_value_list_matrix2){
String score = column_value.split("_")[1];
denominator2 += Double.valueOf(score)*Double.valueOf(score);
}
denominator2 = Math.sqrt(denominator2);
for(String cloumn_value_matrix1:cloumn_value_array_matrix1){
String[] split = cloumn_value_matrix1.split("_");
int cloumn_matrix1 = Integer.parseInt(split[0]);
int v_matrix1 = Integer.parseInt(split[1]);
int v_matrix2 = Integer.parseInt(row_value_list_matrix2[cloumn_matrix1-1]);//取右矩阵第n列第cloumn_matrix1行
numerator +=v_matrix1*v_matrix2;
}
double cos = numerator/(denominator1*denominator2);
if(cos == 0){
continue;
}
//cos就是结果矩阵中的某个元素,坐标
outKey.set(row_matrix1);
outValue.set(cloumn_matrix2+"_"+df.format(cos));
System.out.println("mapper2---send-->key:"+outKey+" value:"+outValue);
//输出格式为 key:行 物品ID value:列_值 用户ID_分值
context.write(outKey, outValue);
}*/
for(String line:cacheList){
String[] rowAndLine_matrix2 = line.toString().split("\t");
//右侧矩阵line
//格式: 列 tab 行_值,行_值,行_值,行_值
String cloumn_matrix2 = rowAndLine_matrix2[0];
String[] row_value_array_matrix2 = rowAndLine_matrix2[1].split(",");
//计算右侧矩阵行的空间距离
double denominator2 = 0;
for(String column_value:row_value_array_matrix2){
String score = column_value.split("_")[1];
denominator2 += Double.valueOf(score)*Double.valueOf(score);
}
denominator2 = Math.sqrt(denominator2);
//矩阵两位相乘得到的结果 分子
int numerator = 0;
//遍历左侧矩阵一行的每一列
for(String cloumn_value_matrix1:cloumn_value_array_matrix1){
String cloumn_matrix1 = cloumn_value_matrix1.split("_")[0];
String value_matrix1 = cloumn_value_matrix1.split("_")[1];
//遍历右侧矩阵一行的每一列
for(String cloumn_value_matrix2:row_value_array_matrix2){
if(cloumn_value_matrix2.startsWith(cloumn_matrix1+"_")){
String value_matrix2 = cloumn_value_matrix2.split("_")[1];
//将两列的值相乘并累加
numerator+= Integer.valueOf(value_matrix1)*Integer.valueOf(value_matrix2);
}
}
}
double cos = numerator/(denominator1*denominator2);
if(cos == 0){
continue;
}
//cos就是结果矩阵中的某个元素,坐标 行:row_matrix1 列:row_matrix2(右侧矩阵已经被转置)
outKey.set(row_matrix1);
outValue.set(cloumn_matrix2+"_"+df.format(cos));
//输出格式为 key:行 value:列_值
context.write(outKey, outValue);
}
}
}
reducer2
package step2;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
* @author liyijie
* @date 2018年5月13日下午11:43:59
* @email [email protected]
* @remark
* @version
*
* 利用评分矩阵构建用户与用户的相似度矩阵
*/
public class Reducer2 extends Reducer{
private Text outKey = new Text();
private Text outValue = new Text();
// key:行 物品ID value:列_值 用户ID_分值
@Override
protected void reduce(Text key, Iterable values, Context context)
throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
for(Text text:values){
sb.append(text+",");
}
String line = null;
if(sb.toString().endsWith(",")){
line = sb.substring(0, sb.length()-1);
}
outKey.set(key);
outValue.set(line);
context.write(outKey,outValue);
}
}
mr2
package step2;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* @author liyijie
* @date 2018年5月13日下午11:44:07
* @email [email protected]
* @remark
* @version
*
*利用评分矩阵构建用户与用户的相似度矩阵
*/
public class MR2 {
private static String inputPath = "/UserCF/step1_output";
private static String outputPath = "/UserCF/step2_output";
//将step1中输出的转置矩阵作为全局缓存
private static String cache="/UserCF/step1_output/part-r-00000";
private static String hdfs = "hdfs://node1:9000";
public int run(){
try {
Configuration conf=new Configuration();
conf.set("fs.defaultFS", hdfs);
Job job = Job.getInstance(conf,"step2");
//如果未开启,使用 FileSystem.enableSymlinks()方法来开启符号连接。
FileSystem.enableSymlinks();
//要使用符号连接,需要检查是否启用了符号连接
boolean areSymlinksEnabled = FileSystem.areSymlinksEnabled();
System.out.println(areSymlinksEnabled);
//添加分布式缓存文件
job.addCacheArchive(new URI(cache+"#itemUserScore1"));
//配置任务map和reduce类
job.setJarByClass(MR2.class);
job.setJar("F:\\eclipseworkspace\\UserCF\\UserCF.jar");
job.setMapperClass(Mapper2.class);
job.setReducerClass(Reducer2.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileSystem fs = FileSystem.get(conf);
Path inpath = new Path(inputPath);
if(fs.exists(inpath)){
FileInputFormat.addInputPath(job,inpath);
}else{
System.out.println(inpath);
System.out.println("不存在");
}
Path outpath = new Path(outputPath);
fs.delete(outpath,true);
FileOutputFormat.setOutputPath(job, outpath);
return job.waitForCompletion(true)?1:-1;
} catch (ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
}
return -1;
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
int result = -1;
result = new MR2().run();
if(result==1){
System.out.println("step2运行成功");
}else if(result==-1){
System.out.println("step2运行失败");
}
}
}
输出结果
3.将评分矩阵转置
输入:步骤1输出
输出:物品ID(行)——用户ID(列)——分值
代码:
mapper3
package step3;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* @author liyijie
* @date 2018年5月13日下午10:36:18
* @email [email protected]
* @remark
* @version
*
* 将评分矩阵转置
*/
public class Mapper3 extends Mapper {
private Text outKey = new Text();
private Text outValue = new Text();
/**
* key:1
* value:1 1_0,2_3,3_-1,4_2,5_-3
* */
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] rowAndLine = value.toString().split("\t");
//矩阵行号 物品ID
String itemID = rowAndLine[0];
//列值 用户ID_分值
String[] lines = rowAndLine[1].split(",");
for(int i = 0 ; i
reducer3
package step3;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
* @author liyijie
* @date 2018年5月13日下午10:56:28
* @email [email protected]
* @remark
* @version
*
*
*
* 将评分矩阵转置
*/
public class Reducer3 extends Reducer {
private Text outKey = new Text();
private Text outValue = new Text();
//key:列号 用户ID value:行号_值,行号_值,行号_值,行号_值... 物品ID_分值
@Override
protected void reduce(Text key, Iterable values, Context context)
throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
//text:行号_值 物品ID_分值
for(Text text:values){
sb.append(text).append(",");
}
String line = null;
if(sb.toString().endsWith(",")){
line = sb.substring(0, sb.length()-1);
}
outKey.set(key);
outValue.set(line);
context.write(outKey,outValue);
}
}
mr3
package step3;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* @author liyijie
* @date 2018年5月13日下午11:07:13
* @email [email protected]
* @remark
* @version
*
* 将评分矩阵转置
*/
public class MR3 {
private static String inputPath = "/UserCF/step1_output";
private static String outputPath = "/UserCF/step3_output";
private static String hdfs = "hdfs://node1:9000";
public int run(){
try {
Configuration conf=new Configuration();
conf.set("fs.defaultFS", hdfs);
Job job = Job.getInstance(conf,"step3");
//配置任务map和reduce类
job.setJarByClass(MR3.class);
job.setJar("F:\\eclipseworkspace\\UserCF\\UserCF.jar");
job.setMapperClass(Mapper3.class);
job.setReducerClass(Reducer3.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileSystem fs = FileSystem.get(conf);
Path inpath = new Path(inputPath);
if(fs.exists(inpath)){
FileInputFormat.addInputPath(job,inpath);
}else{
System.out.println(inpath);
System.out.println("不存在");
}
Path outpath = new Path(outputPath);
fs.delete(outpath,true);
FileOutputFormat.setOutputPath(job, outpath);
return job.waitForCompletion(true)?1:-1;
} catch (ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return -1;
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
int result = -1;
result = new MR3().run();
if(result==1){
System.out.println("step3运行成功");
}else if(result==-1){
System.out.println("step3运行失败");
}
}
}
输出结果
4.用户与用户相似度矩阵X评分矩阵
输入:步骤2输出
缓存:步骤3输出
输出:用户ID(行)——物品ID(列)——分值
代码:
mapper4
package step4;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* @author liyijie
* @date 2018年5月13日下午11:43:51
* @email [email protected]
* @remark
* @version
*
*
* 用户与用户相似度矩阵X评分矩阵(经过步骤3转置)
*/
public class Mapper4 extends Mapper {
private Text outKey = new Text();
private Text outValue = new Text();
private List cacheList = new ArrayList();
private DecimalFormat df = new DecimalFormat("0.00");
/**在map执行之前会执行这个方法,只会执行一次
*
* 通过输入流将全局缓存中的矩阵读入一个java容器中
*/
@Override
protected void setup(Context context)throws IOException, InterruptedException {
super.setup(context);
FileReader fr = new FileReader("itemUserScore2");
BufferedReader br = new BufferedReader(fr);
//右矩阵
//key:行号 物品ID value:列号_值,列号_值,列号_值,列号_值,列号_值... 用户ID_分值
String line = null;
while((line=br.readLine())!=null){
cacheList.add(line);
}
fr.close();
br.close();
}
/**
* key: 行号 物品ID
* value:行 列_值,列_值,列_值,列_值 用户ID_分值
* */
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] rowAndLine_matrix1 = value.toString().split("\t");
//矩阵行号
String row_matrix1 = rowAndLine_matrix1[0];
//列_值
String[] cloumn_value_array_matrix1 = rowAndLine_matrix1[1].split(",");
for(String line:cacheList){
String[] rowAndLine_matrix2 = line.toString().split("\t");
//右侧矩阵line
//格式: 列 tab 行_值,行_值,行_值,行_值
String cloumn_matrix2 = rowAndLine_matrix2[0];
String[] row_value_array_matrix2 = rowAndLine_matrix2[1].split(",");
//矩阵两位相乘得到的结果
double result = 0;
//遍历左侧矩阵一行的每一列
for(String cloumn_value_matrix1:cloumn_value_array_matrix1){
String cloumn_matrix1 = cloumn_value_matrix1.split("_")[0];
String value_matrix1 = cloumn_value_matrix1.split("_")[1];
//遍历右侧矩阵一行的每一列
for(String cloumn_value_matrix2:row_value_array_matrix2){
if(cloumn_value_matrix2.startsWith(cloumn_matrix1+"_")){
String value_matrix2 = cloumn_value_matrix2.split("_")[1];
//将两列的值相乘并累加
result+= Double.valueOf(value_matrix1)*Double.valueOf(value_matrix2);
}
}
}
if(result==0){
continue;
}
//result就是结果矩阵中的某个元素,坐标 行:row_matrix1 列:row_matrix2(右侧矩阵已经被转置)
outKey.set(row_matrix1);
outValue.set(cloumn_matrix2+"_"+df.format(result));
//输出格式为 key:行 value:列_值
context.write(outKey, outValue);
}
}
}
reducer4
package step4;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
* @author liyijie
* @date 2018年5月13日下午11:43:59
* @email [email protected]
* @remark
* @version
*
* 用户与用户相似度矩阵X评分矩阵(经过步骤3转置)
*/
public class Reducer4 extends Reducer{
private Text outKey = new Text();
private Text outValue = new Text();
// key:行 物品ID value:列_值 用户ID_分值
@Override
protected void reduce(Text key, Iterable values, Context context)
throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
for(Text text:values){
sb.append(text+",");
}
String line = null;
if(sb.toString().endsWith(",")){
line = sb.substring(0, sb.length()-1);
}
outKey.set(key);
outValue.set(line);
context.write(outKey,outValue);
}
}
mr4
package step4;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* @author liyijie
* @date 2018年5月13日下午11:44:07
* @email [email protected]
* @remark
* @version
*
* 用户与用户相似度矩阵X评分矩阵(经过步骤3转置)
*/
public class MR4 {
private static String inputPath = "/UserCF/step2_output";
private static String outputPath = "/UserCF/step4_output";
//将step1中输出的转置矩阵作为全局缓存
private static String cache="/UserCF/step3_output/part-r-00000";
private static String hdfs = "hdfs://node1:9000";
public int run(){
try {
Configuration conf=new Configuration();
conf.set("fs.defaultFS", hdfs);
Job job = Job.getInstance(conf,"step4");
//如果未开启,使用 FileSystem.enableSymlinks()方法来开启符号连接。
FileSystem.enableSymlinks();
//要使用符号连接,需要检查是否启用了符号连接
boolean areSymlinksEnabled = FileSystem.areSymlinksEnabled();
System.out.println(areSymlinksEnabled);
//添加分布式缓存文件
job.addCacheArchive(new URI(cache+"#itemUserScore2"));
//配置任务map和reduce类
job.setJarByClass(MR4.class);
job.setJar("F:\\eclipseworkspace\\UserCF\\UserCF.jar");
job.setMapperClass(Mapper4.class);
job.setReducerClass(Reducer4.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileSystem fs = FileSystem.get(conf);
Path inpath = new Path(inputPath);
if(fs.exists(inpath)){
FileInputFormat.addInputPath(job,inpath);
}else{
System.out.println(inpath);
System.out.println("不存在");
}
Path outpath = new Path(outputPath);
fs.delete(outpath,true);
FileOutputFormat.setOutputPath(job, outpath);
return job.waitForCompletion(true)?1:-1;
} catch (ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
}
return -1;
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
int result = -1;
result = new MR4().run();
if(result==1){
System.out.println("step4运行成功");
}else if(result==-1){
System.out.println("step4运行失败");
}
}
}
输出结果
5.根据评分矩阵,将步骤4的输出中,用户已经有过行为的商品评分置零
输入:步骤4输出
缓存:步骤1输出
输出:用户ID(行)——物品ID(列)——分值(最终推荐列表)
代码:
mapper5
package step5;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* @author liyijie
* @date 2018年5月13日下午11:43:51
* @email [email protected]
* @remark
* @version
*
*
* 根据评分矩阵,将步骤4的输出中,用户已经有过行为的商品评分置零
*/
public class Mapper5 extends Mapper {
private Text outKey = new Text();
private Text outValue = new Text();
private List cacheList = new ArrayList();
/**在map执行之前会执行这个方法,只会执行一次
*
* 通过输入流将全局缓存中的矩阵读入一个java容器中
*/
@Override
protected void setup(Context context)throws IOException, InterruptedException {
super.setup(context);
FileReader fr = new FileReader("itemUserScore3");
BufferedReader br = new BufferedReader(fr);
//右矩阵
//key:行号 物品ID value:列号_值,列号_值,列号_值,列号_值,列号_值... 用户ID_分值
String line = null;
while((line=br.readLine())!=null){
cacheList.add(line);
}
fr.close();
br.close();
}
/**
* key: 行号 物品ID
* value:行 列_值,列_值,列_值,列_值 用户ID_分值
* */
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] rowAndLine_matrix1 = value.toString().split("\t");
//矩阵行号 物品ID
String item_matrix1 = rowAndLine_matrix1[0];
//列_值
String[] user_score_array_matrix1 = rowAndLine_matrix1[1].split(",");
for(String line:cacheList){
String[] rowAndLine_matrix2 = line.toString().split("\t");
//右侧矩阵line
//格式: 列 tab 行_值,行_值,行_值,行_值
String item__matrix2 = rowAndLine_matrix2[0];
String[] user_score_array_matrix2 = rowAndLine_matrix2[1].split(",");
//矩阵两位相乘得到的结果
//double result = 0;
//如果物品ID物品相同
if(item_matrix1.equals(item__matrix2)){
//遍历matrix1的列
for(String user_score_matrix1:user_score_array_matrix1){
boolean flag = false;
String user_matrix1 = user_score_matrix1.split("_")[0];
String score_matrix1 = user_score_matrix1.split("_")[1];
//遍历matrix2的列
for(String user_score_matrix2:user_score_array_matrix2){
String user_matrix2 = user_score_matrix2.split("_")[0];
if(user_matrix1.equals(user_matrix2)){
flag = true;
}
}
//该用户没有对该物品产生行为
if(flag==false){
outKey.set(item_matrix1);
outValue.set(user_matrix1+"_"+score_matrix1);
//输出格式为 key:行 value:列_值
context.write(outKey, outValue);
}
}
}
}
}
}
reducer5
package step5;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
* @author liyijie
* @date 2018年5月13日下午11:43:59
* @email [email protected]
* @remark
* @version
*
* 根据评分矩阵,将步骤4的输出中,用户已经有过行为的商品评分置零
*/
public class Reducer5 extends Reducer{
private Text outKey = new Text();
private Text outValue = new Text();
// key:行 物品ID value:列_值 用户ID_分值
@Override
protected void reduce(Text key, Iterable values, Context context)
throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
for(Text text:values){
sb.append(text+",");
}
String line = null;
if(sb.toString().endsWith(",")){
line = sb.substring(0, sb.length()-1);
}
outKey.set(key);
outValue.set(line);
context.write(outKey,outValue);
}
}
mr5
package step5;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* @author liyijie
* @date 2018年5月13日下午11:44:07
* @email [email protected]
* @remark
* @version
*
* 根据评分矩阵,将步骤4的输出中,用户已经有过行为的商品评分置零
*/
public class MR5 {
private static String inputPath = "/UserCF/step4_output";
private static String outputPath = "/UserCF/step5_output";
//将step1中输出的转置矩阵作为全局缓存
private static String cache="/UserCF/step1_output/part-r-00000";
private static String hdfs = "hdfs://node1:9000";
public int run(){
try {
Configuration conf=new Configuration();
conf.set("fs.defaultFS", hdfs);
Job job = Job.getInstance(conf,"step5");
//如果未开启,使用 FileSystem.enableSymlinks()方法来开启符号连接。
FileSystem.enableSymlinks();
//要使用符号连接,需要检查是否启用了符号连接
boolean areSymlinksEnabled = FileSystem.areSymlinksEnabled();
System.out.println(areSymlinksEnabled);
//添加分布式缓存文件
job.addCacheArchive(new URI(cache+"#itemUserScore3"));
//配置任务map和reduce类
job.setJarByClass(MR5.class);
job.setJar("F:\\eclipseworkspace\\UserCF\\UserCF.jar");
job.setMapperClass(Mapper5.class);
job.setReducerClass(Reducer5.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileSystem fs = FileSystem.get(conf);
Path inpath = new Path(inputPath);
if(fs.exists(inpath)){
FileInputFormat.addInputPath(job,inpath);
}else{
System.out.println(inpath);
System.out.println("不存在");
}
Path outpath = new Path(outputPath);
fs.delete(outpath,true);
FileOutputFormat.setOutputPath(job, outpath);
return job.waitForCompletion(true)?1:-1;
} catch (ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
}
return -1;
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
int result = -1;
result = new MR5().run();
if(result==1){
System.out.println("step5运行成功");
}else if(result==-1){
System.out.println("step5运行失败");
}
}
}
输出结果