(考场号,班级号,学号)–> 考场号升序,班级号升序,学号降序
1 1 3
1 1 4
1 2 8
1 3 7
3 2 9
3 5 11
1 4 13
1 5 12
2 1 14
2 1 10
2 4 1
2 3 5
2 4 6
3 5 2
3 2 15
1 1 16
2 2 17
3 3 18
2 2 19
3 3 20
package com.spark.sort
import org.apache.spark.{SparkConf, SparkContext}
import scala.reflect.ClassTag
/**
* Created by wqh on 2017/9/11.
*/
object TestsortBy extends App {
val conf = new SparkConf()
conf.setAppName("TestsortBy").setMaster("local[4]")
val sc = new SparkContext(conf)
val rdd1 = sc.textFile("/Users/wqh/Desktop/data/s.txt", 4)
val rdd2 = rdd1.flatMap(line => Array(line.split(" "))).map(t => (t(0), t(1), t(2)))
val mysortBy = new Ordering[Tuple3[String, String, String]] {
override def compare(x: (String, String, String), y: (String, String, String)): Int = {
val r = x._1.compare(y._1)
val r2 = x._2.compare(y._2)
if (r == 0) {
if (r2 == 0) y._3.toInt - x._3.toInt else x._2.toInt - y._2.toInt
} else r
}
}
val rdd3 = rdd2.sortBy(x => x)(mysortBy, ClassTag.apply[Tuple3[String, String, String]](classOf[Tuple3[String, String, String]]))
rdd3.collect().foreach(println)
}
package com.spark.sort
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by wqh on 2017/9/11.
*/
object TestsortByKey extends App {
val conf = new SparkConf()
conf.setAppName("TestsortBykey").setMaster("local[4]")
val sc = new SparkContext(conf)
val rdd1 = sc.textFile("/Users/wqh/Desktop/data/s.txt", 4)
val rdd2 = rdd1.flatMap(line => Array(line.split(" "))).map(t => ((t(0), t(1), t(2)),1))
implicit val mysort = new Ordering[Tuple3[Int,Int,Int]]{
override def compare(x: (Int, Int, Int), y: (Int, Int, Int)): Int = {
val r = x._1.compare(y._1)
val r2 = x._2.compare(y._2)
if (r == 0) {
if (r2 == 0) y._3 - x._3 else x._2 - y._2
} else r
}
}
val rdd3 = rdd2.sortByKey().collect()
for((k,v) <- rdd3){println(k)}
//rdd3.keys.foreach(println) error
}
sortByKey 的二次排序 重新构成需要排序的数据为key 整个line为value
package com.spark.sort
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by wqh on 2017/9/11.
*/
object SecondarySort extends App {
val conf = new SparkConf()
conf.setAppName("TestsortBy").setMaster("local[4]")
val sc = new SparkContext(conf)
val rdd1 = sc.textFile("/Users/wqh/Desktop/data/s.txt", 4)
val rdd2 = rdd1.map(line => {
val r = line.split(" ")
val key = new SecondarySortKey(r(0).toInt, r(1).toInt,r(2).toInt)
(key, line)
})
val res = rdd2.sortByKey().map(t => t._2)
res.collect().foreach(println)
}
重构key的类:class SecondarySortKey
package com.spark.sort
/**
* Created by wqh on 2017/9/12.
*/
class SecondarySortKey(val first: Int, val second: Int, val third: Int) extends Ordered[SecondarySortKey] with Serializable {
override def compare(other: SecondarySortKey): Int = {
val r = first.compare(other.first)
val r2 = second.compare(other.second)
if (r == 0) {
if (r2 == 0) other.third - this.third else this.second - other.second
} else r
}
}
1 1 16
1 1 4
1 1 3
1 2 8
1 3 7
1 4 13
1 5 12
2 1 14
2 1 10
2 2 19
2 2 17
2 3 5
2 4 6
2 4 1
3 2 15
3 2 9
3 3 20
3 3 18
3 5 11
3 5 2