HadoopMapReduce寻找共同好友

假设有所有用户的好友列表数据,冒号前是一个用户,冒号后是该用户的所有好友(数据中的好友关系是单向的),如果两个用户之间存在共同好友,需要找出他们之间的共同好友。

样例文本如下:

A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J

思路

在MapReduce中是<用户,该用户的所有好友>,首先以好友为键,用户为值,交给reduce找出共同拥有此好友的所有用户;再将这些用户两两配对作为键,之前的键(好友)作为值交给reduce去合并简而言之我打算分成两个步骤。
MapReduce第一步求出拥有共同好友的所有用户。
MapReduce第二步用共同好友的用户两两配对作为key,其好友作为value输出。
package com.sharedfriends;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import com.sun.jndi.url.corbaname.corbanameURLContextFactory;
public class Entry {
	public static void main(String[] args)throws IOException, ClassNotFoundException, InterruptedException  {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		job.setJarByClass(Entry.class);
		job.setMapperClass(SharedFriendsMapper.class);
		job.setReducerClass(SharedFriendsReducer.class);
        job.setOutputKeyClass(Text.class); 
        job.setOutputValueClass(Text.class); 
		FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/user/hadoop/wordcount/input"));
		FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/user/hadoop/wordcount/output"));
		boolean res1=job.waitForCompletion(true);
		Job job1 =  Job.getInstance(conf);
		job1.setJarByClass(Entry.class);
		job1.setMapperClass(SharedFriendsMapperTwo.class);
		job1.setReducerClass(SharedFriendsReducerTwo.class);
        job1.setOutputKeyClass(Text.class); 
        job1.setOutputValueClass(Text.class); 
		FileInputFormat.addInputPath(job1, new Path("hdfs://localhost:9000/user/hadoop/wordcount/output"));
		FileOutputFormat.setOutputPath(job1, new Path("hdfs://localhost:9000/user/hadoop/wordcount/output2"));
		boolean res2=job1.waitForCompletion(true);
		System.exit(res1?0:1);

	}

}
package com.sharedfriends;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class SharedFriendsMapper extends Mapper {
	public void map(Object key, Text value, Context context)
			throws IOException, InterruptedException {
		String str=value.toString();
		Pattern p = Pattern.compile("\\s*|\t|\r|\n");
        Matcher m = p.matcher(str);
        String line= m.replaceAll("");
        String []persons=line.split(":");
		String []friends = persons[1].split(",");
		for (String friend : friends) {
			context.write(new Text(friend), new Text(persons[0]));
		}
	}

}
package com.sharedfriends;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class SharedFriendsReducer extends Reducer {
	public void reduce(Text key, Iterable values, Context context)
			throws IOException, InterruptedException {
		   StringBuffer sb=new StringBuffer();
		   for(Text val:values)
		   {sb.append(val.toString()+",");
		   }
		   String line=sb.deleteCharAt(sb.length()-1).toString();
		   context.write(key, new Text(line));
	}
}
package com.sharedfriends;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class SharedFriendsMapperTwo extends Mapper {
	public void map(Object key, Text value, Context context)
			throws IOException, InterruptedException {
		String []str =value.toString().split("\t");
		String []persons=str[1].split(",");
		Arrays.sort(persons);
		for(int i=0;i {
	public void reduce(Text key, Iterable values, Context context)
			throws IOException, InterruptedException {
		StringBuffer sb = new StringBuffer();
		for (Text val : values) {
			sb.append(val.toString() + ',');
		}
		String line = sb.deleteCharAt(sb.length() - 1).toString();
		context.write(new Text(key.toString()+':'), new Text(line));
	}

}

输出结果如下

第一次MapReduce结果如下

A	I,K,C,B,G,F,H,O,D
B	A,F,J,E
C	A,E,B,H,F,G,K
D	G,C,K,A,L,F,E,H
E	G,M,L,H,A,F,B,D
F	L,M,D,C,G,A
G	M
H	O
I	O,C
J	O
K	B
L	D,E
M	E,F
O	A,H,I,J,F

第二次MapReduce结果如下

A-B:	E,C
A-C:	D,F
A-D:	E,F
A-E:	D,B,C
A-F:	O,B,C,D,E
A-G:	F,E,C,D
A-H:	E,C,D,O
A-I:	O
A-J:	O,B
A-K:	D,C
A-L:	F,E,D
A-M:	E,F
B-C:	A
B-D:	A,E
B-E:	C
B-F:	E,A,C
B-G:	C,E,A
B-H:	A,E,C
B-I:	A
B-K:	C,A
B-L:	E
B-M:	E
B-O:	A
C-D:	A,F
C-E:	D
C-F:	D,A
C-G:	D,F,A
C-H:	D,A
C-I:	A
C-K:	A,D
C-L:	D,F
C-M:	F
C-O:	I,A
D-E:	L
D-F:	A,E
D-G:	E,A,F
D-H:	A,E
D-I:	A
D-K:	A
D-L:	E,F
D-M:	F,E
D-O:	A
E-F:	D,M,C,B
E-G:	C,D
E-H:	C,D
E-J:	B
E-K:	C,D
E-L:	D
F-G:	D,C,A,E
F-H:	A,D,O,E,C
F-I:	O,A
F-J:	B,O
F-K:	D,C,A
F-L:	E,D
F-M:	E
F-O:	A
G-H:	D,C,E,A
G-I:	A
G-K:	D,A,C
G-L:	D,F,E
G-M:	E,F
G-O:	A
H-I:	O,A
H-J:	O
H-K:	A,C,D
H-L:	D,E
H-M:	E
H-O:	A
I-J:	O
I-K:	A
I-O:	A
K-L:	D
K-O:	A
L-M:	E,F

你可能感兴趣的:(Hadoop)