MapReduce实现寻找共同好友

  1. 需求:给出A-O个人中每个人的好友列表,求出哪些人两两之间有共同好友,以及他们的共同好友都有谁。
       注意:这些人好友都是单向的,可能A是B的好友,但是B不一定是A的好友,这种类似的微博的关注,
                  A关注B,但是B不一定关注了A。
  2. 原始文件如下:
      MapReduce实现寻找共同好友_第1张图片

     

  3. 要求输出的格式如下:
     MapReduce实现寻找共同好友_第2张图片
  4. 思路分析:
       ⑴我们从上面可以现在我们知道A-O每个人拥有哪些好友,但是我们现在是要找出两两之间的人有哪些共同好友。那么
           我们可以逆向思维,第一步找出哪些好友拥有A,哪些好友拥有B.....依次找出,结果如下:
           MapReduce实现寻找共同好友_第3张图片
       ⑵通过得出上面的数据后,我们可以对后面的好友进行排序,避免重复,将 “拥有这名朋友的所有人”进行两两配对,并将配对后的         字符串当做键,“朋友”当做值输出,即输出<人-人,共同朋友>
           MapReduce实现寻找共同好友_第4张图片 
  5. 代码实现,通过两次job运算
     a:FriendMapper01
           
    package com.kgf.mapreduce.friend;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class FriendMapper01 extends Mapper{
    
    	Text k  =new Text();
    	Text v  =new Text();
    	
    	@Override
    	protected void map(LongWritable key, Text value,Context context)
    			throws IOException, InterruptedException {
    		//1:获取一行数据
    		String line = value.toString();
    		//2:对一行数据进行切割
    		String[] fields = line.split(":");
    		String person = fields[0];
    		String[] friends = fields[1].split(",");
    		for (String friend : friends) {
    			k.set(friend);
    			v.set(person);
    			context.write(k, v);
    		}
    	}
    }
    

    b:FriendReducer
         

    package com.kgf.mapreduce.friend;
    
    import java.io.IOException;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class FriendReducer extends Reducer {
    
    	@Override
    	protected void reduce(Text key, Iterable values,Context context)
    			throws IOException, InterruptedException {
    		
    		StringBuffer sb = new StringBuffer();
    		//1:获取哪些好友都有对应的人
    		for (Text text : values) {
    			sb.append(text.toString()+",");
    		}
    		sb.deleteCharAt(sb.length()-1);
    		context.write(key, new Text(sb.toString()));
    	}
    }
    

    c:FriendDriver01
         

    package com.kgf.mapreduce.friend;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class FriendDriver01 {
    
    	public static void main(String[] args) throws Exception {
    		//1:获取Job对象
    		Configuration conf = new Configuration();
    		Job job = Job.getInstance(conf);
    		
    		//2:设置jar
    		job.setJarByClass(FriendDriver01.class);
    		
    		//3:关联Mapper和reducer
    		job.setMapperClass(FriendMapper01.class);
    		job.setReducerClass(FriendReducer.class);
    		
    		//4:设置mapper输出参数
    		job.setMapOutputKeyClass(Text.class);
    		job.setMapOutputValueClass(Text.class);
    		
    		//5:设置最终输出
    		job.setOutputKeyClass(Text.class);
    		job.setOutputValueClass(Text.class);
    		
    		//6:设置文件输入输出路径
    		FileInputFormat.setInputPaths(job, new Path(args[0]));
    		FileOutputFormat.setOutputPath(job, new Path(args[1]));
    		
    		//7:提交
    		boolean result = job.waitForCompletion(true);
    		System.exit(result?0:1);
    	}
    	
    }
    

    d:FriengMapper02

    package com.kgf.mapreduce.friend;
    
    import java.io.IOException;
    import java.util.Arrays;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class FriengMapper02 extends Mapper{
    
    	@Override
    	protected void map(LongWritable key, Text value,Context context)
    			throws IOException, InterruptedException {
    		//1:获取一行
    		String line = value.toString();
    		//2:切割数据
    		String[] fileds = line.split("\t");
    		String friend = fileds[0];
    		String[] persons = fileds[1].split(",");
    		Arrays.sort(persons);//排序
    		for (int i = 0; i < persons.length; i++) {
    			for (int j = i+1; j < persons.length; j++) {
    				context.write(new Text(persons[i]+"-"+persons[j]),new Text(friend));
    			}
    		}
    	}
    	
    }
    

    e:FriendReducer2

    package com.kgf.mapreduce.friend;
    
    import java.io.IOException;
    import java.util.HashSet;
    
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class FriendReducer2 extends Reducer {
    
    	@Override
    	protected void reduce(Text key, Iterable values,Context context)
    			throws IOException, InterruptedException {
    		
    		StringBuffer sb = new StringBuffer();
    		HashSet set = new HashSet();
    		
    		for (Text value : values) {
    			String v = value.toString();
    			if(!set.contains(v)) {
    				set.add(v);
    				sb.append(v).append(",");
    			}
    		}
    		sb.deleteCharAt(sb.length()-1);
    		context.write(key, new Text(sb.toString()));
    	}
    }
    

    f:FriendDriver2

    package com.kgf.mapreduce.friend;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class FriendDriver2 {
    
    	public static void main(String[] args) throws Exception {
    		//1:获取Job对象
    		Configuration conf = new Configuration();
    		Job job = Job.getInstance(conf);
    		
    		//2:设置jar
    		job.setJarByClass(FriendDriver2.class);
    		
    		//3:关联Mapper和reducer
    		job.setMapperClass(FriengMapper02.class);
    		job.setReducerClass(FriendReducer2.class);
    		
    		//4:设置mapper输出参数
    		job.setMapOutputKeyClass(Text.class);
    		job.setMapOutputValueClass(Text.class);
    		
    		//5:设置最终输出
    		job.setOutputKeyClass(Text.class);
    		job.setOutputValueClass(Text.class);
    		
    		//6:设置文件输入输出路径
    		FileInputFormat.setInputPaths(job, new Path(args[0]));
    		FileOutputFormat.setOutputPath(job, new Path(args[1]));
    		
    		//7:提交
    		boolean result = job.waitForCompletion(true);
    		System.exit(result?0:1);
    	}
    	
    }
    

     

你可能感兴趣的:(mapreduce)