$ import scala.io.Source
$ val lines = Source.fromFile("/home/badou/Documents/code/mr/mr_wc/The_Man_of_Property.txt").getLines().toList
lines: List[String] = List(Preface, “The.....
$ lines.flatMap(_.split(" ")).map(x=>(x,1))
List[(String, Int)] = List((Preface,1), (“The,1), (....
$ lines.flatMap(_.split(" ")).map(x=>(x,1)).groupBy(_._1)
scala.collection.immutable.Map[String,List[(String, Int)]] = Map(unlikely. -> List((unlikely.,1)), .......
$ lines.flatMap(_.split(" ")).map(x=>(x,1)).groupBy(_._1).map(x=>(x._1,x._2.length))
scala.collection.immutable.Map[String,Int] = Map(unlikely. -> 1, come? -> 1, unexpectedly; -> 1,.....
<==>$ lines.flatMap(_.split(" ")).map(x=>(x,1)).groupBy(_._1).map(x=>(x._1,x._2.size))
<==> $ lines.flatMap(_.split(" ")).map(x=>(x,1)).groupBy(_._1).map(x=>(x._1,x._2.map(_._2).sum))
这三者输出都一样
排序(方法一)
$ lines.flatMap(_.split(" ")).map(x=>(x,1)).groupBy(_._1).map(x=>(x._1,x._2.map(_._2).sum)).toList.sortBy(_._2)
List[(String, Int)] = List((unlikely.,1), (come?,1), (unexpectedly;,1), (easel.,1), (hand’,1), (buns,1), ....
降序排序
$ lines.flatMap(_.split(" ")).map(x=>(x,1)).groupBy(_._1).map(x=>(x._1,x._2.map(_._2).sum)).toList.sortBy(_._2).reverse
List[(String, Int)] = List((the,5144), (of,3407), (to,2782), (and,2573), (a,2543), (he,2139), (his,1912),
取topN
$ lines.flatMap(_.split(" ")).map(x=>(x,1)).groupBy(_._1).map(x=>(x._1,x._2.map(_._2).sum)).toList.sortBy(_._2).reverse.slice(0,10)
List[(String, Int)] = List((the,5144), (of,3407), (to,2782), (and,2573), (a,2543), (he,2139), (his,1912), (was,1702), (in,1694), (had,1526))
排序(方法2) sortWith
$ lines.flatMap(_.split(" ")).map(x=>(x,1)).groupBy(_._1).map(x=>(x._1,x._2.map(_._2).sum)).toArray.sortWith(_._2>_._2).slice(0,10)
Array[(String, Int)] = Array((the,5144), (of,3407), (to,2782), (and,2573), (a,2543), (he,2139), (his,1912), (was,1702), (in,1694), (had,1526))
排序(方法2) toList和toArrray效果一样
$ lines.flatMap(_.split(" ")).map(x=>(x,1)).groupBy(_._1).map(x=>(x._1,x._2.map(_._2).sum)).toList.sortWith(_._2>_._2).slice(0,10)
List[(String, Int)] = List((the,5144), (of,3407), (to,2782), (and,2573), (a,2543), (he,2139), (his,1912), (was,1702), (in,1694), (had,1526))
排序(方法3) mapValues()
$ lines.flatMap(_.split(" ")).map(x=>(x,1)).groupBy(_._1).mapValues(_.size).toArray.sortWith(_._2>_._2).slice(0,10)
Array[(String, Int)] = Array((the,5144), (of,3407), (to,2782), (and,2573), (a,2543), (he,2139), (his,1912), (was,1702), (in,1694), (had,1526))
详解:
$ val b = Array(("a",1),("b",2))
输出:b: Array[(String, Int)] = Array((a,1), (b,2))
$ b.sortWith((t1,t2)=>(t1._2>t2._2))
输出:Array[(String, Int)] = Array((b,2), (a,1))
tuple按照第二个值排序,对于传入的两个tuple,t1和t2,比较第二个值的大小,第二个值大的tuple排在前面。
$简写 b.sortWith(_._2>_._2)
输出:Array[(String, Int)] = Array((b,2), (a,1))