自定义分区器:
val rdd = sc.makeRDD(
List(
("nba", "xxxx"),
("cba", "xxxx"),
("cba", "xxxx"),
("cba", "xxxx"),
("nba", "xxxx"),
("wnba", "xxxx"),
),3
)
val rdd1: RDD[(String, String)] = rdd.partitionBy(new MyPartitioner())
rdd1.saveAsTextFile("output")
sc.stop()
}
class MyPartitioner extends Partitioner{
override def numPartitions: Int = 3
override def getPartition(key: Any): Int = {
key match {
case "nba" => 0
case "cba" => 1
case "wnba" => 2
}
}
}
自定义累加器:
val rdd = sc.makeRDD(List(("Hello", 1), ("Hello", 2), ("Hello", 4)),2)
val wordCount = new WordCountAcc()
sc.register(wordCount,"WordCountAcc")
rdd.foreach(
t=>{
wordCount.add(t)
}
)
println(wordCount.value)
sc.stop()
}
class WordCountAcc extends AccumulatorV2[(String,Int),mutable.Map[String,Int]]{
private val wordCountMap =mutable.Map[String,Int]()
override def isZero: Boolean = wordCountMap.isEmpty
override def copy(): AccumulatorV2[(String, Int), mutable.Map[String, Int]] = {
new WordCountAcc()
}
override def reset(): Unit = wordCountMap.clear()
override def add(v: (String, Int)): Unit = {
val (word,cnt) = v
val oldcnt: Int = wordCountMap.getOrElse(word,0)
wordCountMap.update(word,cnt+oldcnt)
}
override def merge(other: AccumulatorV2[(String, Int), mutable.Map[String, Int]]): Unit = {
val otherMap: mutable.Map[String, Int] = other.value
otherMap.foreach{
case (word,cnt)=>{
val oldcount: Int = this.wordCountMap.getOrElse(word,0)
this.wordCountMap.update(word,cnt+oldcount)
}
}
}
override def value: mutable.Map[String, Int] = wordCountMap
}