hadoop找出QQ共同好友算法实现

背景
A:B,C,D,E,F 表示A有bcdef好友
B:C,D,H,Y
以上可知道AB的共同好友为CD
思路:
1:我们先找出一个人被哪几个人共同拥有
测试数据:
hadoop找出QQ共同好友算法实现_第1张图片
2:第一阶段mr程序:

package sharefriends;


import join.DataJoin;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;

public class ShareFriendsOne {

    static class ShareFriendsMapper extends Mapper {

        Text key = new Text();
        Text out = new Text();
        @Override
        protected void map(LongWritable a, Text value, Context context) throws IOException, InterruptedException {
            //A:B,C,D,E,F
            String line = value.toString();
            String[] person_friends = line.split(":");
            String friends = person_friends[1];
            String person = person_friends[0];
            for (String friend:friends.split(",")){
                key.set(friend);
                out.set(person);
                context.write(key,out);
            }
        }
    }
    static class ShareFriendsReducer extends Reducer{

        Text out = new Text();

        @Override
        protected void reduce(Text friend, Iterable persons, Context context) throws IOException, InterruptedException {
            StringBuilder sb = new StringBuilder();
            for (Text person:persons){
                sb.append(person).append(",");
            }
            out.set(sb.toString());
            context.write(friend,out);
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();


        //本地调试模式
        conf.set("mapreduce.framework.name","local");
        conf.set("fs.defaultFS","file:///");

//        本地提交模式 hdfs在线
//        conf.set("mapreduce.framework.name","local");
//        conf.set("fs.defaultFS","hdfs://master:9000");



        Job job = Job.getInstance();

        //线上
        job.setJarByClass(ShareFriendsOne.class);
        //调试模式
//        job.setJar("/home/willian/Desktop/project/java/hadoop/out/jar/word.jar");

        //指定运行的map程序
        job.setMapperClass(ShareFriendsMapper.class);
        job.setReducerClass(ShareFriendsReducer.class);

//        //指定map输出数据的kv类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        //最终输出数据类型kv
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
//        job.setNumReduceTasks(0);

//
//        job.addArchiveToClassPath(); //缓存jar包到task节点的classpath
//        job.addFileToClassPath(); //缓存普通文件到task节点的classpath
//        job.addCacheFile(); //缓存普通文件到task节点的工作目录中


        //指定job的输入原始文件所在目录
//        FileInputFormat.setInputPaths(job,new Path("/wordcount/input"));
        FileInputFormat.setInputPaths(job, new Path("/home/willian/Desktop/project/java/hadoop/mrlocal/friends.txt"));
        FileOutputFormat.setOutputPath(job, new Path("/home/willian/Desktop/project/java/hadoop/mrlocal/out"));

        Boolean res = job.waitForCompletion(true);

        System.exit(res ? 0 : 1);
    }

}

3:第一阶段结果数据如下:
hadoop找出QQ共同好友算法实现_第2张图片

4:第二阶段是遍历第一阶段的结果,key是两个人,out是他们拥有的共同的一个好友

package sharefriends;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.Arrays;

public class ShareFriendsTow {

    static class ShareFriendsTowMapper extends Mapper{

        Text key = new Text();
        Text out = new Text();
        //A F,H,D,G,B,K,C,I,O,
        @Override
        protected void map(LongWritable a, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] friend_persons = line.split("\t");
            String friend = friend_persons[0];
            String[] persons = friend_persons[1].split(",");
            Arrays.sort(persons);
            for (int i=0;i
                for (int j=i+1;j
                    key.set(persons[i]+"+"+persons[j]);
                    out.set(friend);
                    context.write(key,out);
                }
            }
        }
    }

    static class ShareFriendsTowReducer extends Reducer {

        Text out = new Text();

        @Override
        protected void reduce(Text friend, Iterable persons, Context context) throws IOException, InterruptedException {
            StringBuilder sb = new StringBuilder();
            for (Text person:persons){
                sb.append(person).append(",");
            }
            out.set(sb.toString());
            context.write(friend,out);
        }
    }
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();


        //本地调试模式
        conf.set("mapreduce.framework.name","local");
        conf.set("fs.defaultFS","file:///");

//        本地提交模式 hdfs在线
//        conf.set("mapreduce.framework.name","local");
//        conf.set("fs.defaultFS","hdfs://master:9000");



        Job job = Job.getInstance();

        //线上
        job.setJarByClass(ShareFriendsTowMapper.class);
        //调试模式
//        job.setJar("/home/willian/Desktop/project/java/hadoop/out/jar/word.jar");

        //指定运行的map程序
        job.setMapperClass(ShareFriendsTowMapper.class);
        job.setReducerClass(ShareFriendsTowReducer.class);

//        //指定map输出数据的kv类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        //最终输出数据类型kv
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
//        job.setNumReduceTasks(0);

//
//        job.addArchiveToClassPath(); //缓存jar包到task节点的classpath
//        job.addFileToClassPath(); //缓存普通文件到task节点的classpath
//        job.addCacheFile(); //缓存普通文件到task节点的工作目录中


        //指定job的输入原始文件所在目录
//        FileInputFormat.setInputPaths(job,new Path("/wordcount/input"));
        FileInputFormat.setInputPaths(job, new Path("/home/willian/Desktop/project/java/hadoop/mrlocal/friendout.txt"));
        FileOutputFormat.setOutputPath(job, new Path("/home/willian/Desktop/project/java/hadoop/mrlocal/out"));

        Boolean res = job.waitForCompletion(true);

        System.exit(res ? 0 : 1);
    }
}

最终结果如下
hadoop找出QQ共同好友算法实现_第3张图片

得出两个人之间的所有共同好友

你可能感兴趣的:(hadoop)