Flink 数据读取api

读取数据

1.创建一个实例用于数据存储

case class SensorReading(id: String, timestamp: Long, temperrture: Double)

1.从数组中读取数据

object SourceDemo {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    //1.从数组中读取数据
    val dataStream1: DataStream[SensorReading] = env.fromCollection(List(
      SensorReading("Test_1", 154789, 35.1547924),
      SensorReading("Test_2", 164789, 38.1547924),
      SensorReading("Test_3", 184789, 25.1547924),
      SensorReading("Test_4", 154789, 45.1547924)
    ))

    dataSet.print("dataStream1").setParallelism(1)
    env.execute("Execute SouceDemo")
    }
}

2.从文件中读取数据

    //2.文件中读取数据
    val dataStream2: DataStream[String] = env.readTextFile("E:\\Vocaboly\\ideaIU\\IDEAJOB\\flinkdemo\\src\\main\\resources\\sensor.txt")
    dataSet2.print("dataSet2").setParallelism(1)

3.从kafka中读取数据

启动kakfa并创建一个topic first
在topic first创建一个生产者

//3.从kafka中读取数据
    val properties = new Properties()
    properties.setProperty("bootstrap.servers", "hdp-1:9092")
    properties.setProperty("group.id", "consumer-group")
    properties.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    properties.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    properties.setProperty("auto.offset.reset", "latest")

    val dataStream3: DataStream[String] = env.addSource(new FlinkKafkaConsumer[String]("first", new SimpleStringSchema(), properties))
    dataStream3.print("dataStream3").setParallelism(1)

4.自定义Source

//自定义Source
class SensorSource() extends SourceFunction[SensorReading]{

  //定义一个flag,表示数据源是否正常运行
  var running: Boolean = true

  //正常生成数据
  override def run(sourceContext: SourceFunction.SourceContext[SensorReading]): Unit = {
    //初始化一个随机数生成器
    val rand = new Random()

    //初始化数据
    var curTemp = 1.to(10).map(
      i => ("Sensor_" + i, 60 + rand.nextGaussian() * 20)
    )

    //产生数据
    while(running){
      curTemp.map(
        t => (t._1, t._2 + rand.nextGaussian())
      )

      //获取当前时间
      val curTime: Long = System.currentTimeMillis()

      curTemp.foreach(
        t => sourceContext.collect(SensorReading(t._1, curTime, t._2))
      )

      //设置休眠时间
      Thread.sleep(500)
    }

  }

  //取消数据源的生成
  override def cancel(): Unit = {
    return false
  }
}

读取自定义Source中随机生成的数据

    //4.自定义Source
    val dataStream4: DataStream[SensorReading] = env.addSource(new SensorSource())
    dataStream4.print("dataStream4").setParallelism(1)

    env.execute("Execute SouceDemo")

你可能感兴趣的:(Flink)