数据:
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
第一阶段计算结果:
A I,K,C,B,G,F,H,O,D
B A,F,J,E
C A,E,B,H,F,G,K
D G,C,K,A,L,F,E,H
E G,M,L,H,A,F,B,D
F L,M,D,C,G,A
G M
H O
I O,C
J O
K B
L D,E
M E,F
O A,H,I,J,F
第二阶段计算结果:
A-B: C,E
A-C: D,F
A-D: E,F
A-E: B,C,D
A-F: B,C,D,E,O
A-G: C,D,E,F
A-H: C,D,E,O
A-I: O
A-J: B,O
A-K: C,D
A-L: D,E,F
A-M: E,F
B-C: A
B-D: A,E
B-E: C
B-F: A,C,E
B-G: A,C,E
B-H: A,C,E
B-I: A
B-K: A,C
B-L: E
B-M: E
B-O: A
C-D: A,F
C-E: D
C-F: A,D
C-G: A,D,F
C-H: A,D
C-I: A
C-K: A,D
C-L: D,F
C-M: F
C-O: A,I
D-E: L
D-F: A,E
D-G: A,E,F
D-H: A,E
D-I: A
D-K: A
D-L: E,F
D-M: E,F
D-O: A
E-F: B,C,D,M
E-G: C,D
E-H: C,D
E-J: B
E-K: C,D
E-L: D
F-G: A,C,D,E
F-H: A,C,D,E,O
F-I: A,O
F-J: B,O
F-K: A,C,D
F-L: D,E
F-M: E
F-O: A
G-H: A,C,D,E
G-I: A
G-K: A,C,D
G-L: D,E,F
G-M: E,F
G-O: A
H-I: A,O
H-J: O
H-K: A,C,D
H-L: D,E
H-M: E
H-O: A
I-J: O
I-K: A
I-O: A
K-L: D
K-O: A
L-M: E,F
代码:
第一阶段Mapper
/**
* @Author zhaoxin
* @Email [email protected]
* @Description //第一阶段的Map
* 遍历原始文件中每行<所有朋友>信息
* 遍历“朋友”集合,以每个“朋友”为键,原来的“人”为值 即输出<朋友,人>
* @Date 2019/1/2
**/
public class MyMapper1 extends Mapper {
@Override
protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
String line=value.toString();
String[] person_friends=line.split(":");
String person=person_friends[0];
String[] friends=person_friends[1].split(",");
for (String friend:friends){
context.write(new Text(friend),new Text(person));
}
}
}
第一阶段Reduce
/**
* @Author zhaoxin
* @Email [email protected]
* @Description 第一阶段 Reduce
* 对所有传过来的<朋友,list(人)>进行拼接,输出<朋友,拥有这名朋友的所有人>
* @Date 2019/1/2
**/
public class MyReduce1 extends Reducer {
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
StringBuffer bf= new StringBuffer();
for (Text friend:values){
bf.append(friend.toString()).append(",");
}
bf=bf.deleteCharAt(bf.length()-1);
context.write(key,new Text(String.valueOf(bf)));
}
}
第二阶段Mapper
/**
* @Author zhaoxin
* @Email [email protected]
* @Description //第二阶段Mapper 将第一阶段产生的数据作为原数据
* 1.将上一阶段reduce输出的<朋友,拥有这名朋友的所有人>信息中的 “拥有这名朋友的所有人”进行排序 ,以防出现B-C C-B这样的重复
* 2.将“拥有这名朋友的所有人”进行两两配对,并将配对后的字符串当做键,“朋友”当做值输出,即输出<人-人,共同朋友>
* @Date 2019/1/2
**/
public class MyMapper2 extends Mapper {
@Override
protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] friend_persons = line.split("\t");
String friend = friend_persons[0];
String[] persons = friend_persons[1].split(",");
Arrays.sort(persons); //排序
//两两配对
for (int i=0;i 中的“共同好友”进行拼接 最后输出<人-人,两人的所有共同好友>
* @Date 2019/1/2
**/
public class MyReduce2 extends Reducer {
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
StringBuffer bf=new StringBuffer();
Set set=new HashSet<>();
for (Text s:values){
if (!set.contains(s.toString())){
set.add(s.toString());
}
}
for (String s:set){
bf.append(s).append(",");
}
bf=bf.deleteCharAt(bf.length()-1);
context.write(key,new Text(bf.toString()));
}
}
驱动类
/**
* @Author zhaoxin
* @Email [email protected]
* @Description //TODO
* @Date 2019/1/2
**/
public class MyDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
System.setProperty("HADOOP_USER_NAME","root");
Configuration configuration=new Configuration();
configuration.set("fs.default.name", "hdfs://192.168.136.128:9000/");
//第一阶段
Job job1=Job.getInstance(configuration);
job1.setJarByClass(MyDriver.class);
job1.setMapperClass(MyMapper1.class);
job1.setReducerClass(MyReduce1.class);
job1.setMapOutputKeyClass(Text.class);
job1.setMapOutputValueClass(Text.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(Text.class);
job1.setOutputFormatClass(TextOutputFormat.class);
job1.setInputFormatClass(TextInputFormat.class);
Path in =new Path("/zxx2/qq/pubuser.txt");
Path out=new Path("/zxx2/qq/pub_out1");
FileInputFormat.addInputPath(job1,in);
FileOutputFormat.setOutputPath(job1,out);
System.out.println(job1.waitForCompletion(true)?-1:1);
//第二阶段
Job job2=Job.getInstance(configuration);
job2.setJarByClass(MyDriver.class);
job2.setMapperClass(MyMapper2.class);
job2.setReducerClass(MyReduce2.class);
job2.setMapOutputKeyClass(Text.class);
job2.setMapOutputValueClass(Text.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(Text.class);
job2.setOutputFormatClass(TextOutputFormat.class);
job2.setInputFormatClass(TextInputFormat.class);
Path in2 =new Path("/zxx2/qq/pub_out1");
Path out2=new Path("/zxx2/qq/pub_out2");
FileInputFormat.addInputPath(job2,in2);
FileOutputFormat.setOutputPath(job2,out2);
System.out.println(job2.waitForCompletion(true)?-2:2);
}
}