以下是qq的好友列表数据,冒号前是一个用户,冒号后是该用户的所有好友(数据中的好友
关系是单向的)
A:B,C,D,F,E,O
B:A,C,E,K
C:A,B,D,E,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
求出哪些人两两之间有共同好友,及他俩的共同好友都有谁?
package com.hadoop.common_friends_step1;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class Step1Mapper extends Mapper {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 1. 以: 拆分数据 冒号左边就是 K2
String[] split = value.toString().split(":");
String userStr = split[0];
// 2. 将冒号右边的字符串以逗号拆分,每个成员就是K2
String[] split1 = split[1].split(",");
for (String s : split1) {
// 3. 将k2 V2 写入上下文中
context.write(new Text(s),new Text(userStr));
}
}
}
package com.hadoop.common_friends_step1;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class Step1Reducer extends Reducer {
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
StringBuffer stringBuffer = new StringBuffer();
// 1. 遍历集合 并将每个元素拼接 得到K3
for (Text value : values) {
stringBuffer.append(value.toString()).append("-");
}
// 2. K2 就是 V3
// 3. 将K3 和V3 写到上下文中
context.write(new Text(stringBuffer.toString()),new Text(key));
}
}
package com.hadoop.common_friends_step1;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Step1Main extends Configured implements Tool {
@Override
public int run(String[] strings) throws Exception {
//1. 获取job对象
Job job = Job.getInstance(super.getConf(), "map_reduce_step1");
// 2. 设置job任务
// 第一步: 设置输入类 和输入路径
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("file:///E:\\input\\friends_step_input"));
// 第二步: 设置Mapper类和数据类型
job.setMapperClass(Step1Mapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// 第三、四、五、六
// 第七步: 设置Reduce 类和类型
job.setReducerClass(Step1Reducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// 设置输出类 和输出路径
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job,new Path("file:///E:\\out\\friends_step_out"));
// 3. 等待任务结束
boolean b = job.waitForCompletion(true);
return b ? 0 : 1;
}
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration, new Step1Main(), args);
System.exit(run);
}
}
package com.hadoop.common_friends_step2;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.Arrays;
public class Step2Mapper extends Mapper {
/**
*
*
* k1 V1
0 I-K-B-G-F-H-O-C-D- A
k2 V2
I-K A
I-B A
**/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 1. 拆分行文本数据 结果的第二部分可以得到 V2
String[] split = value.toString().split("\t");
String friendsStr = split[1];
// 2. 继续 以 ‘-’ 为分隔符拆分行文本数据第一部分,得到数组
String[] userArray = split[0].split("-");
//3. 对数组做一个排序
Arrays.sort(userArray);
//4. 对数组中的元素 进行两两组合 ,得到 K2
/**
*
A-E-C ---------> A C E
*
*
**/
for (int i = 0; i < userArray.length; i++) {
for (int j = i+1 ; j < userArray.length; j++) {
// 5.将 K2 和 V2 写入上下文中
context.write(new Text(userArray[i]+ "-"+ userArray[j]),new Text(friendsStr));
}
}
}
}
package com.hadoop.common_friends_step2;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class Step2Reduce extends Reducer {
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
// 1. 原来的K2 就是 K3
//2. 将集合进行遍历,将集合中的元素拼接 , 得到 V3
StringBuffer buffer = new StringBuffer();
for (Text value : values) {
buffer.append(value.toString()).append("-");
}
// 3. 将 K3 和 V3 写入 上下文中
context.write(key,new Text(buffer.toString()));
}
}
package com.hadoop.common_friends_step2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Step2Main extends Configured implements Tool {
@Override
public int run(String[] strings) throws Exception {
// 1. 获取job对象
Job job = Job.getInstance(super.getConf(), "friends_step2");
// 2. 设置job任务
// 第一步: 设置输入类和输入路径
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("file:///E:\\out\\friends_step_out"));
// 第二步: 设置Mapper 类和 数据类型
job.setMapperClass(Step2Mapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// 第三 四 五 六
// 第七步: 设置Reduce 类和类型
job.setReducerClass(Step2Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// 设置输出类 和输出路劲
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job,new Path("file:///E:\\out\\friends_step2_out"));
//3. 等待任务结束
boolean b = job.waitForCompletion(true);
return b ? 0 : 1;
}
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration, new Step2Main(), args);
System.exit(run);
}
}
将第一次的结果,作为第二次的处理数据,经过两次处理得出结果:
A-B C-E-
A-C B-E-D-
A-D F-E-
A-E B-D-C-
A-F O-E-D-B-C-
A-G C-F-D-E-
A-H O-D-C-E-
A-I O-
A-J B-O-
A-K C-D-
A-L D-E-F-
A-M E-F-
B-C A-E-
B-D E-A-
B-E C-
B-F E-C-A-
B-G E-A-C-
B-H C-A-E-
B-I A-
B-K C-A-
B-L E-
B-M E-
B-O A-
C-D E-A-
C-E D-B-
C-F E-B-A-D-
C-G D-E-A-
C-H A-E-D-
C-I A-
C-J B-
C-K A-D-
C-L D-E-
C-M E-
C-O A-I-
D-E L-
D-F A-E-
D-G E-F-A-
D-H E-A-
D-I A-
D-K A-
D-L E-F-
D-M F-E-
D-O A-
E-F M-C-D-B-
E-G D-C-
E-H C-D-
E-J B-
E-K C-D-
E-L D-
F-G E-D-C-A-
F-H A-D-O-C-E-
F-I O-A-
F-J B-O-
F-K D-C-A-
F-L E-D-
F-M E-
F-O A-
G-H D-C-E-A-
G-I A-
G-K D-A-C-
G-L F-D-E-
G-M E-F-
G-O A-
H-I A-O-
H-J O-
H-K A-D-C-
H-L E-D-
H-M E-
H-O A-
I-J O-
I-K A-
I-O A-
K-L D-
K-O A-
L-M F-E-