spark 分区间排序求TopN问题

scala 利用Treemap来进行排序求TOPN的方法:

 def sortedByTeaAndCount(itm:Iterator[((String, String), Int)], topN:Int)={
    val hashMap:mutable.Map[String,Int] = new mutable.HashMap[String, Int]()
    implicit val valueOrdering = new Ordering[String]{
      override def compare(x: String, y: String): Int = {
        val xValue:Int = if(hashMap.contains(x)) hashMap.get(x).get else 0
        val yValue:Int = if(hashMap.contains(y)) hashMap.get(y).get else 0
        if(xValue > yValue) -1 else 1
      }
    }
    var resultMap: TreeMap[String, Int] = new TreeMap[String, Int]()
    while (itm.hasNext) {
      val tuples: ((String, String), Int) = itm.next()
      val tea: String = tuples._1._2
      val count: Int = tuples._2
      hashMap+=(tea->count)
      resultMap =resultMap+(tea->count)
      val dropEms: TreeMap[String, Int] = resultMap.drop(topN)
      for ((k,v)<-dropEms){
        println("key"+k+",value"+v)
      }
      dropEms.foreach(resultMap -= _._1)
      resultMap = resultMap.take(topN)
    }
    resultMap.iterator
  }

也可参考:(自定义排序)
https://blog.csdn.net/wt346326775/article/details/72844126

你可能感兴趣的:(spark)