2019独角兽企业重金招聘Python工程师标准>>>
基于腾讯大数据QQ共同好友推荐系统,我们基于mapreduce来实现下
测试数据:前面代表QQ用户,:后面代表用户QQ好友
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
可以分两个步骤来实现
第一步map和reduce阶段
1.先根据用户QQ好友作为key,用户作为value来输出:
比如第一行:B->A C->A D->A F->A E->A O->A
第二行:A->B C->B E->B K->B
然后根据相同的key使用reduce来聚合
比如输出:B->A B->C B->D....
reduce会根据相同的key分发给相应的reduce task来执行
结果输出为:B->A C D...
再写代码组合输出:
A-C B
A-D B
C-D B
.....
代码实现
package com.xuyu.friends;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
public class CommonFriendsOne {
public static class CommonFriendsMapper extends Mapper{
Text k=new Text();
Text v=new Text();
//A:B,C,D,F,E,O
//输出:B->A C->A D-A...
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] userAndFriends = value.toString().split(":");
String user=userAndFriends[0];
String[] friends= userAndFriends[1].split(",");
v.set(user);
for (String f:friends){
k.set(f);
//friend user
context.write(k,v);
}
}
}
public static class CommonFriendsOneReduce extends Reducer{
//一组数据:B->A E F J...
//一组数据:C->B F E J...
//所以需要排序
@Override
protected void reduce(Text friend, Iterable users, Context context) throws IOException, InterruptedException {
ArrayList userList=new ArrayList();
for (Text user:users){
userList.add(user.toString());
}
//排序
Collections.sort(userList);
//组合输出
for(int i=0;i
结果输出
part-r-000000文件
B-C A
B-D A
B-F A
B-G A
B-H A
B-I A
B-K A
B-O A
C-D A
C-F A
C-G A
C-H A
C-I A
C-K A
C-O A
D-F A
D-G A
D-H A
D-I A
D-K A
D-O A
F-G A
F-H A
F-I A
F-K A
F-O A
G-H A
G-I A
G-K A
G-O A
H-I A
H-K A
H-O A
I-K A
I-O A
K-O A
A-E B
A-F B
A-J B
E-F B
E-J B
F-J B
A-B C
A-E C
A-F C
A-G C
A-H C
A-K C
B-E C
B-F C
B-G C
B-H C
B-K C
E-F C
E-G C
E-H C
E-K C
F-G C
F-H C
F-K C
G-H C
G-K C
H-K C
A-C D
A-E D
A-F D
A-G D
A-H D
A-K D
A-L D
C-E D
C-F D
C-G D
C-H D
C-K D
C-L D
E-F D
E-G D
E-H D
E-K D
E-L D
F-G D
F-H D
F-K D
F-L D
G-H D
G-K D
G-L D
H-K D
H-L D
K-L D
A-B E
A-D E
A-F E
A-G E
A-H E
A-L E
A-M E
B-D E
B-F E
B-G E
B-H E
B-L E
B-M E
D-F E
D-G E
D-H E
D-L E
D-M E
F-G E
F-H E
F-L E
F-M E
G-H E
G-L E
G-M E
H-L E
H-M E
L-M E
A-C F
A-D F
A-G F
A-L F
A-M F
C-D F
C-G F
C-L F
C-M F
D-G F
D-L F
D-M F
G-L F
G-M F
L-M F
C-O I
D-E L
E-F M
A-F O
A-H O
A-I O
A-J O
F-H O
F-I O
F-J O
H-I O
H-J O
I-J O
第二步:将第一步输出的结果文件再进行一次map reduce
输出结果应该为这种:第一行表示:A和B用户都有C和E这两个共同好友
A-B [C, E]
A-C [D, F]
A-D [E, F]
A-E [B, C, D]
A-F [B, C, D, E, O]
A-G [C, D, E, F]
代码实现
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
public class CommonFriendsOne2 {
public static class CommonFriendsMapper extends Mapper{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] friendsAndUser = value.toString().split("\t");
context.write(new Text(friendsAndUser[0]),new Text(friendsAndUser[1]));
}
}
public static class CommonFriendsOneReduce extends Reducer{
@Override
protected void reduce(Text friends, Iterable users, Context context) throws IOException, InterruptedException {
ArrayList userList=new ArrayList();
for (Text user:users){
userList.add(user.toString());
}
//排序
Collections.sort(userList);
context.write(friends,new Text(userList.toString()));
}
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//动态获取jar包在哪里
job.setJarByClass(CommonFriendsOne2.class);
//2.封装参数:本次job所要调用的mapper实现类
job.setMapperClass(CommonFriendsMapper.class);
job.setReducerClass(CommonFriendsOneReduce.class);
//3.封装参数:本次job的Mapper实现类产生的数据key,value的类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//4.封装参数:本次Reduce返回的key,value数据类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job,new Path("F:\\mrdata\\friends\\output"));
FileOutputFormat.setOutputPath(job,new Path("F:\\mrdata\\friends\\output2"));
boolean res = job.waitForCompletion(true);
System.exit(res ? 0:-1);
}
}
统计输出结果为
A-B [C, E]
A-C [D, F]
A-D [E, F]
A-E [B, C, D]
A-F [B, C, D, E, O]
A-G [C, D, E, F]
A-H [C, D, E, O]
A-I [O]
A-J [B, O]
A-K [C, D]
A-L [D, E, F]
A-M [E, F]
B-C [A]
B-D [A, E]
B-E [C]
B-F [A, C, E]
B-G [A, C, E]
B-H [A, C, E]
B-I [A]
B-K [A, C]
B-L [E]
B-M [E]
B-O [A]
C-D [A, F]
C-E [D]
C-F [A, D]
C-G [A, D, F]
C-H [A, D]
C-I [A]
C-K [A, D]
C-L [D, F]
C-M [F]
C-O [A, I]
D-E [L]
D-F [A, E]
D-G [A, E, F]
D-H [A, E]
D-I [A]
D-K [A]
D-L [E, F]
D-M [E, F]
D-O [A]
E-F [B, C, D, M]
E-G [C, D]
E-H [C, D]
E-J [B]
E-K [C, D]
E-L [D]
F-G [A, C, D, E]
F-H [A, C, D, E, O]
F-I [A, O]
F-J [B, O]
F-K [A, C, D]
F-L [D, E]
F-M [E]
F-O [A]
G-H [A, C, D, E]
G-I [A]
G-K [A, C, D]
G-L [D, E, F]
G-M [E, F]
G-O [A]
H-I [A, O]
H-J [O]
H-K [A, C, D]
H-L [D, E]
H-M [E]
H-O [A]
I-J [O]
I-K [A]
I-O [A]
K-L [D]
K-O [A]
L-M [E, F]
版权@须臾之余https://my.oschina.net/u/3995125