SparkStreaming之滑动窗口的实现

import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.log4j.{Logger,Level}
object Window {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("ReduceByKeyAndWindow").setMaster("local[2]")
    val ssc = new StreamingContext(conf,Seconds(1))
    Logger.getLogger("org.apache.spark").setLevel(Level.WARN)

    val inputDStream: ReceiverInputDStream[String] = ssc.socketTextStream("hadoop02",9999)
    val wordOneDStream: DStream[(String, Int)] = inputDStream.flatMap(_.split(",")).map((_,1))
    /*
      reduceFunc: (V, V) => V,
      windowDuration: Duration,
      slideDuration: Duration
      (_+_) is fault
      每隔4分钟统计近6分钟的单词数
     */
    val resultDStream: DStream[(String, Int)] = wordOneDStream.reduceByKeyAndWindow((x:Int, y:Int)=>x+y,Seconds(6),Seconds(4))
    resultDStream.print()

    ssc.start()
    ssc.awaitTermination()
    ssc.stop()
  }
}

你可能感兴趣的:(Spark学习随笔)