map与flatMap的区别

spark版本:spark 2.0.2

scala版本:2.11.8

服务器版本:CentOS 6.7

对比map和flatMap在RDD中的使用:


val rdd1 = sc.parallelize(Seq(("one two three four five six seven"), ("one two three four five six seven"), ("one two three four five six seven")))


rdd1.map(_.split(" ")).collect
/*
res6: Array[Array[String]] = Array(Array(one, two, three, four, five, six, seven), 
                                  Array(one, two, three, four, five, six, seven), 
								  Array(one, two, three, four, five, six, seven))
*/

rdd1.flatMap(_.split(" ")).collect
/*
res7: Array[String] = Array(one, two, three, four, five, six, seven, 
                            one, two, three, four, five, six, seven, 
							one, two, three, four, five, six, seven)
*/


val rdd2 = sc.parallelize(Seq((1, "one two three four five six seven"), (2, "one two three four five six seven"), (3, "one two three four five six seven")))

rdd2.map(x => (x._1, x._2.split(" "))).collect
/*
res14: Array[(Int, Array[String])] = Array((1,Array(one, two, three, four, five, six, seven)), 
                                           (2,Array(one, two, three, four, five, six, seven)), 
										   (3,Array(one, two, three, four, five, six, seven)))
*/


rdd2.map(x => {
val x2 = x._2.split(" ")
(x._1, x2.toIterable)}).collect
/*
res4: Array[(Int, Iterable[String])] = Array((1,WrappedArray(one, two, three, four, five, six, seven)), 
                                             (2,WrappedArray(one, two, three, four, five, six, seven)), 
											 (3,WrappedArray(one, two, three, four, five, six, seven)))
*/

rdd2.map(x => {
val x2 = x._2.split(" ")
(x._1, x2.toIterable)}).flatMap{x =>     
val y = x._2
for (w <- y) yield (x._1, w)}.collect

/*
res7: Array[(Int, String)] = Array((1,one), (1,two), (1,three), (1,four), (1,five), (1,six), (1,seven), 
                                   (2,one), (2,two), (2,three), (2,four), (2,five), (2,six), (2,seven), 
								   (3,one), (3,two), (3,three), (3,four), (3,five), (3,six), (3,seven))
*/

  

你可能感兴趣的:(map与flatMap的区别)