原始数据:每个人的好友列表
A:B,C,D,F,E,O
B:A,C,E,K
C:E,A,D,L
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
输出数据:每两个人之间的共同好友
A,B E
A,C E
A,D F,E
A,E C,B,D
A,F C,B,D,E,O
A,G F,E,D
A,H O,D,E
A,I O
A,J O
A,L F,E
A,M F,E
B,A C
B,D A,E
B,E C
B,F C,A
B,G E,A,C
B,K C,A
C,A D
C,B A,E
C,D E,L,A
C,E D,L
C,F A,D
C,G E,A,D
C,H E,D
C,K D,A
C,L D,E
C,M E
D,E L
D,G E,A,F
D,K A
D,M F
E,F M
F,B E
F,C E
F,D A,E
F,E C,B,D
F,G E,A
F,H E
F,K A
F,L E
F,M E
G,A C
G,E C,D
G,F D,C
G,K A,C
H,A C
H,B A,E,C
H,C A
H,D E,A
H,E C,D
H,F A,D,C,O
H,G D,A,E,C
H,I A,O
H,J O
H,K C,A
H,O A
I,B A
I,C A
I,D A
I,F A,O
I,G A
I,J O
I,K A
J,A B
J,E B
J,F O,B
K,A C,D
K,E D,C
K,F C,D
K,G D
K,H D
L,A D
L,B E
L,D F,E
L,E D
L,F D
L,G E,D,F
L,H E,D
L,K D
L,M E,F
M,B E
M,D E
M,G E,F
M,H E
O,B A
O,C A
O,D A
O,F A
O,G A
O,I A
O,K A
此处运用了mapreduce的迭代式,连续运行了两个mapreduce程序
mapreduce1:
统计有共同好友x的所有人
输入文件
A:B,C,D,F,E,O
B:A,C,E,K
C:E,A,D,L
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
输出文件
A H,O,I,C,B,F,D,G,K
B J,A,F,E
C H,B,G,K,A,F,E
D C,L,K,A,H,G,F,E
E A,F,C,L,M,H,B,D,G
F A,L,D,M,G
G M
H O
I O
J O
K B
L C,D,E
M E,F
O A,H,I,J,F
mapreduce2:
统计两个人之间的共同好友
输入文件
A H,O,I,C,B,F,D,G,K
B J,A,F,E
C H,B,G,K,A,F,E
D C,L,K,A,H,G,F,E
E A,F,C,L,M,H,B,D,G
F A,L,D,M,G
G M
H O
I O
J O
K B
L C,D,E
M E,F
O A,H,I,J,F
输出文件
A,B E
A,C E
A,D F,E
A,E C,B,D
A,F C,B,D,E,O
A,G F,E,D
A,H O,D,E
A,I O
A,J O
A,L F,E
A,M F,E
B,A C
B,D A,E
B,E C
B,F C,A
B,G E,A,C
B,K C,A
C,A D
C,B A,E
C,D E,L,A
C,E D,L
C,F A,D
C,G E,A,D
C,H E,D
C,K D,A
C,L D,E
C,M E
D,E L
D,G E,A,F
D,K A
D,M F
E,F M
F,B E
F,C E
F,D A,E
F,E C,B,D
F,G E,A
F,H E
F,K A
F,L E
F,M E
G,A C
G,E C,D
G,F D,C
G,K A,C
H,A C
H,B A,E,C
H,C A
H,D E,A
H,E C,D
H,F A,D,C,O
H,G D,A,E,C
H,I A,O
H,J O
H,K C,A
H,O A
I,B A
I,C A
I,D A
I,F A,O
I,G A
I,J O
I,K A
J,A B
J,E B
J,F O,B
K,A C,D
K,E D,C
K,F C,D
K,G D
K,H D
L,A D
L,B E
L,D F,E
L,E D
L,F D
L,G E,D,F
L,H E,D
L,K D
L,M E,F
M,B E
M,D E
M,G E,F
M,H E
O,B A
O,C A
O,D A
O,F A
O,G A
O,I A
O,K A
程序源码:
JobSubmitter(main函数)
package mr1;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* Created by hadoop on 17-2-20.
*/
public class JobSubmitter {
public static void main(String[] args) throws Exception {
//获取Job实例
Configuration conf = new Configuration();
Job Job1 = Job.getInstance(conf);
//制定本job所在jar包
Job1.setJarByClass(JobSubmitter.class);
//设置Job1所用的mapper和reducer类
Job1.setMapperClass(map1.class);
Job1.setReducerClass(reduce1.class);
//设置两个阶段的数据传输类型
Job1.setMapOutputKeyClass(Text.class);
Job1.setMapOutputValueClass(Text.class);
Job1.setOutputKeyClass(Text.class);
Job1.setOutputValueClass(Text.class);
//设置处理的文件路径
// FileInputFormat.setInputPaths(Job1,"hdfs://hadoop-virtual-machine:9000/wordcount/input");
// FileOutputFormat.setOutputPath(Job1,new Path("hdfs://hadoop-virtual-machine:9000/wordcount/output2"));
FileInputFormat.setInputPaths(Job1,"/home/hadoop/input");
FileOutputFormat.setOutputPath(Job1,new Path("/home/hadoop/output"));
//提交job给hadoop集群
Job1.waitForCompletion(true);
//以上完成了第一次mapreduce
//开始第二次mapreduce
Configuration conf2 = new Configuration();
Job job2 = Job.getInstance(conf2);
job2.setJarByClass(JobSubmitter.class);
job2.setMapperClass(map2.class);
job2.setReducerClass(reduce2.class);
job2.setMapOutputKeyClass(Text.class);
job2.setMapOutputValueClass(Text.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job2,"/home/hadoop/output");
FileOutputFormat.setOutputPath(job2,new Path("/home/hadoop/output2"));
job2.waitForCompletion(true);
}
}
map1
package mr1;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* 相同好友统计
* Created by hadoop on 17-2-20.
*/
public class map1 extends Mapper<LongWritable,Text,Text,Text> {
Text mKey = null;
Text mValue = null;
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
/**
* 变量说明:
* line:获取到的文件的每一行
* Owners:将主人和好友分开
* Owner:主人的名字
* Part:好友的名字
*/
String line = value.toString();
String[] Owners = line.split(":");
String Owner = Owners[0];
String[] parts = Owners[1].split(",");
for(String part:parts)
{
context.write(new Text(part),new Text(Owner));
}
}
}
reduce1
package mr1;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Iterator;
/**
* Created by hadoop on 17-2-20.
*/
public class reduce1 extends Reducer<Text,Text,Text,Text> {
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
String Friends = "";
Iterator iterator = values.iterator();
while(iterator.hasNext())
{
Friends += iterator.next().toString()+",";
}
Friends = Friends.substring(0,Friends.length()-1);
context.write(key,new Text(Friends));
}
}
map2
package mr1;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* 参数解释:
* line:每一行的数据v
* commonFriend:共同的好友
* Owner:拥有共同好友的人
* Created by hadoop on 17-2-20.
*/
public class map2 extends Mapper<LongWritable,Text,Text,Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] commonFriends = line.split("\t");
String commonFriend = commonFriends[0];
String[] Owners = commonFriends[1].split(",");
for(int i = 0;i1;i++)
{
for(int j=i+1;jnew Text(Owners[i]+","+Owners[j]),new Text(commonFriend));
}
}
}
}
reduce2
package mr1;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Iterator;
/**
* Created by hadoop on 17-2-20.
*/
public class reduce2 extends Reducer<Text,Text,Text,Text> {
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
String Owners = "";
Iterator iterator = values.iterator();
while(iterator.hasNext())
{
Owners += iterator.next().toString()+",";
}
Owners = Owners.substring(0,Owners.length()-1);
context.write(key,new Text(Owners));
}
}