spark-core求共同好友

spark-core求共同好友

数据样式

A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J

代码

  • 第一遍聚合拿到每个好友拥有的所有用户。
  • 第二遍对两个用户进行聚合,拿到共同好友。
package com.caiw.sparkcore

import org.apache.spark.sql.SparkSession

import scala.collection.immutable.HashMap
import scala.collection.mutable

object ShareFriend {
  def main(args: Array[String]): Unit = {
    val ss = SparkSession.builder().appName("shareFriend").master("local[1]").getOrCreate()

    ss.sparkContext
      .textFile("spark/src/main/resources/friend.txt")
      .map(_.split(":"))
      .flatMap{
        strs =>
          val user = strs(0)
          val friends = strs(1).split(",")
          var map = Map[String,(String, String)]()
          friends.foreach(friend => map += (friend -> Tuple2(friend, user)))
          map
      }
      .reduceByKey{
      (t1,t2) =>
        (t1._1,t1._2+","+t2._2)
      }
      .map{
      x =>
        val users = x._2._2.split(",")
        x._1 -> users
      }
      .flatMap{
      xx =>
        val users = xx._2
        val a = users.size
        var b = 0
        var map = Map[String,String]()
        while(b < a-1){
          var c = b+1
          while(c < a) {
            map += (users(b)+"+"+users(c) -> xx._1)
            c += 1
          }
          b += 1
        }
        map
      }
      .reduceByKey{
      (f1,f2) =>
        f1+","+f2
      }
      .foreach{
        result =>
          println(result._1 + "\t" +result._2)
      }
  }
}

你可能感兴趣的:(spark-core求共同好友)