Spark Streaming--2 自定义数据源

通过继承Receiver,并实现onStart、onStop方法来自定义数据源采集。

需要自己开一个sockect,,然后输入内容。

nc -lk master 8888
package com.jiangnan.spark
import java.io.{BufferedReader, InputStreamReader}
import java.net.Socket
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.receiver.Receiver
class TestSparkStreamCustomReceiver(host:String,port:Int) extends Receiver[String](StorageLevel.DISK_ONLY){
  //启动的时候调用
  override def onStart(): Unit = {
    println("启动了")
    //创建一个socket
    val socket = new Socket(host,port)
    val reader = new BufferedReader(new InputStreamReader(socket.getInputStream))
    //创建一个变量去读取socket的输入流的数据
    var line = reader.readLine()
    while(!isStopped() && line != null){
      //如果接收到了数据,就是用父类中的store方法进行保存
      store(line)
      //继续读取下一行数据
      line = reader.readLine()
    }

  }

  ////终止的时候调用
  override def onStop(): Unit = {
    println("停止了")
  }
}


object TestSparkStreamCustomReceiver extends App{
  //配置对象
  val conf = new SparkConf().setAppName("").setMaster("local[2]")
  //创建StreamContext
  val ssc = new StreamingContext(conf,Seconds(5))
  //从socket接收数据
  val lineDStream = ssc.receiverStream(new TestSparkStreamCustomReceiver("master",8888))
  //统计词频
  val res = lineDStream.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)

  res.print()
  //启动
  ssc.start()
  ssc.awaitTermination()
}

 

你可能感兴趣的:(Spark)