社交粉丝数据分析----求qq共同好友

需求:

以下是qq的好友列表数据,冒号前是一个用,冒号后是该用户的所有好友(数据中的好友关系是单向的)

A:B,C,D,F,E,O

B:A,C,E,K

C:F,A,D,I

D:A,E,F,L

E:B,C,D,M,L

F:A,B,C,D,E,O,M

G:A,C,D,E,F

H:A,C,D,E,O

I:A,O

J:B,O

K:A,C,D

L:D,E,F

M:E,F,G

O:A,H,I,J

求出哪些人两两之间有共同好友,及他俩的共同好友都有谁?

第一阶段思路:我们先找出一个人被哪几个人共同拥有

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class ShareFriendsStepOne {

	static class ShareFriendsMapper extends Mapper {

		@Override
		protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
			// A:B,C,D,F,E,O
			String value = values.toString();
			String[] person_friends = value.split(":");
			String person = person_friends[0];
			String friends = person_friends[1];

			for (String friend : friends.split(",")) {
				//输出(友人)
				context.write(new Text(friend), new Text(person));
			}

		}

	}

	static class ShareFriendsReduce extends Reducer {
		// (B,A) (C,A) (D,A)(E,A)(F,A)

		@Override
		protected void reduce(Text friend, Iterable persons, Context context)
				throws IOException, InterruptedException {

			StringBuffer sb = new StringBuffer();

			for (Text person : persons) {
				sb.append(person).append(",");

			}
			// 输出(友人,人,人,人)
			context.write(friend, new Text(sb.toString()));
		}
	}

	public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();
		String path = "";
		String path1 = "";
		if (args.length == 2) {
			path = args[0]; // 要处理是文件夹
			path1 = args[1]; // 结果
		}

		Job job = Job.getInstance(conf);
		job.setJarByClass(ShareFriendsStepOne.class);
		
		job.setMapperClass(ShareFriendsMapper.class);
		job.setReducerClass(ShareFriendsReduce.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		FileInputFormat.setInputPaths(job, new Path(path));
		FileOutputFormat.setOutputPath(job, new Path(path1));

		// 向yarn集群提交这个job
		boolean res = job.waitForCompletion(true);
		System.exit(res?0:1);

	}
}
第一阶段:输出结果如下:
A I,K,C,B,G,F,H,O,D,
B A,F,J,E,
C A,E,B,H,F,G,K,
D G,C,K,A,L,F,E,H,
E G,M,L,H,A,F,B,D,
F L,M,D,C,G,A,
G M,
H O,
I O,C,
J O,
K B,
L D,E,
M E,F,
O A,H,I,J,F,

第二阶段:遍历第一阶段的结果,key是两个人,value是他们拥有的共同的一个好友

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class SharedFriendsStepTwo {

	static class SharedFriendsStepTwoMapper extends Mapper {

		// 拿到的数据是上一个步骤的输出结果
		// A I,K,C,B,G,F,H,O,D,
		// 友 人,人,人
		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

			String line = value.toString();
			String[] friend_persons = line.split("\t");

			String friend = friend_persons[0];
			String[] persons = friend_persons[1].split(",");

			Arrays.sort(persons);

			for (int i = 0; i < persons.length - 1; i++) {
				for (int j = i + 1; j < persons.length; j++) {
					// 发出 <人-人,好友> ,这样,相同的“人-人”对的所有好友就会到同1个reduce中去
					context.write(new Text(persons[i] + "-" + persons[j]), new Text(friend));
				}

			}

		}

	}

	static class SharedFriendsStepTwoReducer extends Reducer {

		@Override
		protected void reduce(Text person_person, Iterable friends, Context context)
				throws IOException, InterruptedException {

			StringBuffer sb = new StringBuffer();

			for (Text friend : friends) {
				sb.append(friend).append(" ");

			}
			context.write(person_person, new Text(sb.toString()));
		}

	}

	public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();
		String path = "";
		String path1 = "";
		if (args.length == 2) {
			path = args[0]; // 要处理是文件夹
			path1 = args[1]; // 结果
		}

		Job job = Job.getInstance(conf);
		job.setJarByClass(SharedFriendsStepTwo.class);

		job.setMapperClass(SharedFriendsStepTwoMapper.class);
		job.setReducerClass(SharedFriendsStepTwoReducer.class);

		job.setMapOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		FileInputFormat.setInputPaths(job, new Path(path));
		FileOutputFormat.setOutputPath(job, new Path(path1));

		// 向yarn集群提交这个job
		boolean res = job.waitForCompletion(true);
		System.exit(res ? 0 : 1);

	}

}

第二阶段输出结果:

A-B E C 
A-C D F 
A-D E F 
A-E D B C 
A-F O B C D E 
A-G F E C D 
A-H E C D O 
A-I
A-J O B 
A-K D C 
A-L F E D 
A-M E F 
B-C
B-D A E 
B-E
B-F E A C 
B-G C E A 
B-H A E C 
B-I
B-K C A 
B-L
B-M
B-O
C-D A F 
C-E
C-F D A 
C-G D F A 
C-H D A 
C-I
C-K A D 
C-L D F 
C-M
C-O I A 
D-E
D-F A E 
D-G E A F 
D-H A E 
D-I
D-K
D-L E F 
D-M F E 
D-O
E-F D M C B 
E-G C D 
E-H C D 
E-J
E-K C D 
E-L
F-G D C A E 
F-H A D O E C 
F-I O A 
F-J B O 
F-K D C A 
F-L E D 
F-M
F-O
G-H D C E A 
G-I
G-K D A C 
G-L D F E 
G-M E F 
G-O
H-I O A 
H-J
H-K A C D 
H-L D E 
H-M
H-O
I-J
I-K
I-O
K-L
K-O
L-M E F 


你可能感兴趣的:(Hadoop实战,社交粉丝分析,qq共同好友)