查找共同好友

一、概述

查找共同好友如:

  • A有好友B,C,D,E,O
  • B有好友A,C,E,K
  • C有好友F,A,D,I
  • ......
A:B,C,D,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J

思路:

1、先查找好友与那些人是好友如(第1个MR):

  • B   ->  A,C,E,K,F
  • C  ->  A,B,F,D,I

2、将1中结果排序组合如(第2个MR中的Map):

  • AC,AE,AK,AF,CE,CK,CF,EK,EF,KF同时拥有好友B
  • AB,AF,,AD,AI,BF,BD,BI,FD,FI同时拥有好友C

3、将结果2分组如(第2个MR中的Reduce):

  • AF同时有用好友B,C

二、代码

package com.cfl.hadoop.demo.mr;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;

/**
 * 查找共同好友如:
 * @author chenfenli
 *
 */
public class CommonFriendsMapReduce {

	/**
	 * 第一个MR
	 * @author chenfenli
	 *
	 */
	public static class CommonFriendsMapper extends Mapper {
		private Text k = new Text();
		private Text v = new Text();
		
		@Override
		protected void map(Object key, Text value, Mapper.Context context)
				throws IOException, InterruptedException {
			String str = value.toString();
			if(StringUtils.isEmpty(str)) {
				return;
			}
			String[] ss = str.split(":");
			for(String s : ss[1].split(",")) {
				k.set(s);
				v.set(ss[0]);
				context.write(k, v);
			}
		}
	}
	public static class CommonFriendsReduce extends Reducer {
		private Text k = new Text();
		private Text v = new Text();
		
		@Override
		protected void reduce(Text key, Iterable value, Reducer.Context context)
				throws IOException, InterruptedException {
			Iterator it = value.iterator();
			StringBuffer sb = new StringBuffer();
			int i = 0;
			while(it.hasNext()) {
				if(i == 0) {
					sb.append(it.next().toString());
					i = 1;
				} else {
					sb.append(",").append(it.next().toString());
				}
			}
			k.set(key.toString()+":"+sb.toString());
			context.write(k, v);
 		}
	}
	
	/**
	 * 第2个MR
	 * @author chenfenli
	 *
	 */
	public static class CommonFriendsTwoMapper extends Mapper {
		private Text k = new Text();
		private Text v = new Text();
		
		@Override
		protected void map(Object key, Text value, Mapper.Context context)
				throws IOException, InterruptedException {
			String str = value.toString();
			if(StringUtils.isEmpty(str)) {
				return;
			}
			String[] ss = str.split(":");
			String tempKey = ss[0];
			String[] tempVlues = ss[1].split(",");
			Arrays.sort(tempVlues);
			for(int i = 0; i < tempVlues.length-1; i++) {
				for(int j = i+1; j < tempVlues.length; j++) {
					k.set(tempVlues[i].trim() + "-" + tempVlues[j].trim());
					v.set(tempKey);
					context.write(k, v);
				}
			}
		}
	}
	public static class CommonFriendsTwoReduce extends Reducer {
		private Text k = new Text();
		private Text v = new Text();
		
		@Override
		protected void reduce(Text key, Iterable value, Reducer.Context context)
				throws IOException, InterruptedException {
			Iterator it = value.iterator();
			StringBuffer sb = new StringBuffer();
			int i = 0;
			while(it.hasNext()) {
				if(i == 0) {
					sb.append(it.next().toString());
					i = 1;
				} else {
					sb.append(",").append(it.next().toString());
				}
			}
			k.set(key.toString());
			v.set(sb.toString());
			context.write(k, v);
 		}
	}
	
	
	public static void main(String[] args) throws Exception {
		String inputPath = "/Users/chenfenli/Documents/work_haozhun/Hadoop/src/main/java/com/cfl/hadoop/files/CommonFriends";
		String outputPath = "/Users/chenfenli/Documents/work_haozhun/Hadoop/src/main/java/com/cfl/hadoop/files/temp";
		int flag = oneJob(inputPath, outputPath);
		if(flag != 0) {
			return;
		}
		
		String twoOutputPath = "/Users/chenfenli/Documents/work_haozhun/Hadoop/src/main/java/com/cfl/hadoop/files/temp2";
		int twoflag = twoJob(outputPath, twoOutputPath);
		System.out.println(twoflag);
	}
	
	public static int twoJob(String inputPath, String outputPath) throws Exception {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		job.setJarByClass(CommonFriendsMapReduce.class);
		job.setMapperClass(CommonFriendsTwoMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setReducerClass(CommonFriendsTwoReduce.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
	
		deleteFile(new File(outputPath));
		FileInputFormat.addInputPath(job, new Path(inputPath));
		FileOutputFormat.setOutputPath(job, new Path(outputPath));
		return job.waitForCompletion(true) ? 0 : 1;
	}
	
	public static int oneJob(String inputPath, String outputPath) throws Exception {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		job.setJarByClass(CommonFriendsMapReduce.class);
		job.setMapperClass(CommonFriendsMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setReducerClass(CommonFriendsReduce.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
	
		deleteFile(new File(outputPath));
		FileInputFormat.addInputPath(job, new Path(inputPath));
		FileOutputFormat.setOutputPath(job, new Path(outputPath));
		return job.waitForCompletion(true) ? 0 : 1;
	}
	
	public static void deleteFile(File file) {
		if(file.exists()) {
			if(file.isFile()){
				file.delete();
			}else{
				File[] listFiles = file.listFiles();
				for (File file2 : listFiles) {
					deleteFile(file2);
				}
			}
			file.delete();
		}
	}
}
  • 第1个MR结果:
A:K,F,I,C,D,G,B,O	
B:J,F,E,A	
C:A,G,B,E,K,F	
D:F,L,K,G,E,C,A	
E:F,L,D,M,G,B,A	
F:M,L,D,C	
G:M	
H:O	
I:O,C	
J:O	
K:B	
L:D,E	
M:F,E	
O:A,G,I,J,F	
  • 第2个MR结果:
A-B	E,C
A-C	D
A-D	E
A-E	B,C,D
A-F	E,B,O,D,C
A-G	E,D,O,C
A-I	O
A-J	O,B
A-K	C,D
A-L	E,D
A-M	E
B-C	A
B-D	A,E
B-E	C
B-F	E,C,A
B-G	C,E,A
B-I	A
B-K	A,C
B-L	E
B-M	E
B-O	A
C-D	A,F
C-E	D
C-F	A,D
C-G	A,D
C-I	A
C-K	D,A
C-L	F,D
C-M	F
C-O	A,I
D-E	L
D-F	A,E
D-G	E,A
D-I	A
D-K	A
D-L	F,E
D-M	E,F
D-O	A
E-F	B,C,D,M
E-G	C,D
E-J	B
E-K	C,D
E-L	D
F-G	A,C,E,O,D
F-I	O,A
F-J	O,B
F-K	D,C,A
F-L	E,D
F-M	E
F-O	A
G-I	O,A
G-J	O
G-K	A,C,D
G-L	D,E
G-M	E
G-O	A
I-J	O
I-K	A
I-O	A
K-L	D
K-O	A
L-M	E,F

 

你可能感兴趣的:(Hadoop)