使用Scala的集合工具包中,使用combinations将字符串生成二元组集合。
spark版本:2.0.2
val list1 = List(1,3,2,4,5,7,6,9,8) /* list1: List[Int] = List(1, 3, 2, 4, 5, 7, 6, 9, 8) */ val list2 = list1.sorted.combinations(2) /* list2: Iterator[List[Int]] = non-empty iterator */ list2.foreach(println) /* List(1, 2) List(1, 3) List(1, 4) List(1, 5) List(1, 6) List(1, 7) List(1, 8) List(1, 9) List(2, 3) List(2, 4) List(2, 5) List(2, 6) List(2, 7) List(2, 8) List(2, 9) List(3, 4) List(3, 5) List(3, 6) List(3, 7) List(3, 8) List(3, 9) List(4, 5) List(4, 6) List(4, 7) List(4, 8) List(4, 9) List(5, 6) List(5, 7) List(5, 8) List(5, 9) List(6, 7) List(6, 8) List(6, 9) List(7, 8) List(7, 9) List(8, 9) */ val rdd1 = sc.parallelize(Seq(Seq(1,2,3,4,5),Seq(2,1,4,5,6,9),Seq(1,6,3,7,2,4))) /* rdd1: org.apache.spark.rdd.RDD[Seq[Int]] = ParallelCollectionRDD[0] at parallelize at:24 */ rdd1.collect /* res1: Array[Seq[Int]] = Array(List(1, 2, 3, 4, 5), List(2, 1, 4, 5, 6, 9), List(1, 6, 3, 7, 2, 4)) */ val rdd2 = rdd1.flatMap(x => x.sorted.combinations(2)) /* rdd2: org.apache.spark.rdd.RDD[Seq[Int]] = MapPartitionsRDD[1] at flatMap at :26 */ rdd2.collect /* res2: Array[Seq[Int]] = Array(List(1, 2), List(1, 3), List(1, 4), List(1, 5), List(2, 3), List(2, 4), List(2, 5), List(3, 4), List(3, 5), List(4, 5), List(1, 2), List(1, 4), List(1, 5), List(1, 6), List(1, 9), List(2, 4), List(2, 5), List(2, 6), List(2, 9), List(4, 5), List(4, 6), List(4, 9), List(5, 6), List(5, 9), List(6, 9), List(1, 2), List(1, 3), List(1, 4), List(1, 6), List(1, 7), List(2, 3), List(2, 4), List(2, 6), List(2, 7), List(3, 4), List(3, 6), List(3, 7), List(4, 6), List(4, 7), List(6, 7)) */ rdd2.foreach(println) /* List(1, 2) List(1, 2) List(1, 3) List(1, 2) List(1, 4) List(1, 3) List(1, 4) List(1, 6) List(1, 7) List(1, 4) List(2, 3) List(1, 5) List(2, 4) List(1, 5) List(2, 6) List(1, 6) List(2, 3) List(2, 7) List(1, 9) List(2, 4) List(3, 4) List(2, 4) List(2, 5) List(3, 6) List(2, 5) List(3, 4) List(3, 7) List(2, 6) List(3, 5) List(4, 6) List(4, 5) List(2, 9) List(4, 7) List(4, 5) List(6, 7) List(4, 6) List(4, 9) List(5, 6) List(5, 9) List(6, 9) */