A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
该数据可以看作好友,例如:A有B,C,D,F,E,O好友;B有A,C,E,K好友,以此类推;
求两两之间有共同好友,及他俩的共同好友都是谁,例如:A和B之间共同好友是:C、E
编码思路:
第一步是可以把好友当作key,value是拥有key好友的用户,例如:拥有好友B的是:A,F,J,E用户
第二步在第一步结果后,双重for循环进行两两之间进行拼接,这样就可以得出正确结果
具体代码实现:
第一步:
package com.zsy.mr.commonfriend;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class commonFriendStepOne {
static class commonFriendStepOneMapper extends Mapper{
Text k = new Text();
Text v = new Text();
@Override
protected void map(LongWritable key, Text value, Mapper.Context context)
throws IOException, InterruptedException {
//通过过冒号分割
String[] splits = value.toString().split(":");
//获取拥有好友的用户名
String name = splits[0];
//获取该用户下的好友列表
String[] friends = StringUtils.isNotBlank(splits[1])? splits[1].split(","):null;
if(friends != null) {
//循环好友,好友当作key,拥有好友的用户名当作value
for (String friend : friends) {
k.set(friend);
v.set(name);
context.write(k, v);
}
}
}
}
static class commonFriendStepOneReducer extends Reducer{
Text v = new Text();
@Override
protected void reduce(Text key, Iterable values, Reducer.Context context)
throws IOException, InterruptedException {
List resultList = new ArrayList();//实际生产代码不建议用list接收,应该是直接处理掉
//处理数据,该数据是拥有key好友的所有用户
for (Text value : values) {
resultList.add(value.toString());
}
v.set(StringUtils.join(resultList, ","));
context.write(key, v);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
/*conf.set("mapreduce.framework.name", "yarn");
conf.set("yarn.resoucemanger.hostname", "hadoop01");*/
Job job = Job.getInstance(conf);
job.setJarByClass(commonFriendStepOne.class);
//指定本业务job要使用的业务类
job.setMapperClass(commonFriendStepOneMapper.class);
job.setReducerClass(commonFriendStepOneReducer.class);
//指定mapper输出的k v类型 如果map的输出和reduce的输出一样,只需要设置输出即可
//job.setMapOutputKeyClass(Text.class);
//job.setMapOutputValueClass(IntWritable.class);
//指定最终输出kv类型(reduce输出类型)
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//指定job的输入文件所在目录
FileInputFormat.setInputPaths(job, new Path(args[0]));
//指定job的输出结果目录
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//将job中配置的相关参数,以及job所有的java类所在 的jar包,提交给yarn去运行
//job.submit();无结果返回,建议不使用它
boolean res = job.waitForCompletion(true);
System.exit(res?0:1);
}
}
结果:
第二步:
代码实现
package com.zsy.mr.commonfriend;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class commonFriendStepTwo {
static class commonFriendStepTwoMapper extends Mapper{
Text k = new Text();
Text v = new Text();
@Override
protected void map(LongWritable key, Text value, Mapper.Context context)
throws IOException, InterruptedException {
String[] splits = value.toString().split("\t");
//获取好友
String friend = splits[0];
//获取拥有该好友所有的用户信息
String[] names = splits[1].split(",");
//进行排序,防止计算数据重复,例如:A-B和B-A其实一个对
Arrays.sort(names);
//进行双重for循环
for (int i = 0; i < names.length-1; i++) {
String string = names[i];
for (int j = i+1; j < names.length; j++) {
String string2 = names[j];
k.set(string+"-"+string2);
v.set(friend);
context.write(k, v);
}
}
}
}
static class commonFriendStepTwoReducer extends Reducer{
Text k = new Text();
@Override
protected void reduce(Text key, Iterable value, Reducer.Context context)
throws IOException, InterruptedException {
List resultList = new ArrayList();//实际生产代码不建议用list接收,应该是直接处理掉
for (Text text : value) {
resultList.add(text.toString());
}
k.set(key.toString()+":"+ StringUtils.join(resultList,","));
context.write(k, NullWritable.get());
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
/*conf.set("mapreduce.framework.name", "yarn");
conf.set("yarn.resoucemanger.hostname", "hadoop01");*/
Job job = Job.getInstance(conf);
job.setJarByClass(commonFriendStepTwo.class);
//指定本业务job要使用的业务类
job.setMapperClass(commonFriendStepTwoMapper.class);
job.setReducerClass(commonFriendStepTwoReducer.class);
//指定mapper输出的k v类型 如果map的输出和reduce的输出一样,只需要设置输出即可
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//指定最终输出kv类型(reduce输出类型)
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//指定job的输入文件所在目录
FileInputFormat.setInputPaths(job, new Path(args[0]));
//指定job的输出结果目录
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//将job中配置的相关参数,以及job所有的java类所在 的jar包,提交给yarn去运行
//job.submit();无结果返回,建议不使用它
boolean res = job.waitForCompletion(true);
System.exit(res?0:1);
}
}
结果:
这样就可以找到正确结果