spark-core求共同好友
数据样式
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
代码
- 第一遍聚合拿到每个好友拥有的所有用户。
- 第二遍对两个用户进行聚合,拿到共同好友。
package com.caiw.sparkcore
import org.apache.spark.sql.SparkSession
import scala.collection.immutable.HashMap
import scala.collection.mutable
object ShareFriend {
def main(args: Array[String]): Unit = {
val ss = SparkSession.builder().appName("shareFriend").master("local[1]").getOrCreate()
ss.sparkContext
.textFile("spark/src/main/resources/friend.txt")
.map(_.split(":"))
.flatMap{
strs =>
val user = strs(0)
val friends = strs(1).split(",")
var map = Map[String,(String, String)]()
friends.foreach(friend => map += (friend -> Tuple2(friend, user)))
map
}
.reduceByKey{
(t1,t2) =>
(t1._1,t1._2+","+t2._2)
}
.map{
x =>
val users = x._2._2.split(",")
x._1 -> users
}
.flatMap{
xx =>
val users = xx._2
val a = users.size
var b = 0
var map = Map[String,String]()
while(b < a-1){
var c = b+1
while(c < a) {
map += (users(b)+"+"+users(c) -> xx._1)
c += 1
}
b += 1
}
map
}
.reduceByKey{
(f1,f2) =>
f1+","+f2
}
.foreach{
result =>
println(result._1 + "\t" +result._2)
}
}
}