Flink读取数据【博学谷学习记录】

1.从集合读取数据

import org.apache.flink.streaming.api.scala._


object SourceList {

  def main(args: Array[String]): Unit = {
      //1.创建执行的环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    //2.从集合中读取数据
    val sensorDS: DataStream[WaterSensor] = env.fromCollection(
      // List(1,2,3,4,5)
      List(
        WaterSensor("ws_001", 1577844001, 45.0),
        WaterSensor("ws_002", 1577844015, 43.0),
        WaterSensor("ws_003", 1577844020, 42.0)
      )
    )
    //3.打印
    sensorDS.print()
    //4.执行
    env.execute("sensor")

  }


 
  case class WaterSensor(id: String, ts: Long, vc: Double)
}

2.从文件中读取数据

import org.apache.flink.streaming.api.scala._


object SourceFile {

  def main(args: Array[String]): Unit = {
    //1.创建执行的环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //2.从指定路径获取数据
    val fileDS: DataStream[String] = env.readTextFile("input/data.log")

    //3.打印
    fileDS.print()

    //4.执行
    env.execute("sensor")

  }
}
/**
 * 在读取文件时,文件路径可以是目录也可以是单一文件。如果采用相对文件路径,会从当前系统参数user.dir中获取路径
 * System.getProperty("user.dir")
 */
/**
 * 如果在IDEA中执行代码,那么系统参数user.dir自动指向项目根目录,
 * 如果是standalone集群环境, 默认为集群节点根目录,当然除了相对路径以外,
 * 也可以将路径设置为分布式文件系统路径,如HDFS
 val fileDS: DataStream[String] =
 env.readTextFile( "hdfs://hadoop02:9000/test/1.txt")
 */

3.kafka读取数据

import java.util.Properties

import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011
import org.apache.flink.streaming.util.serialization.SimpleStringSchema


object SourceKafka {

  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment =
      StreamExecutionEnvironment.getExecutionEnvironment

    val properties = new Properties()
    properties.setProperty("bootstrap.servers", "hadoop02:9092")
    properties.setProperty("group.id", "consumer-group")
    properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
    properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
    properties.setProperty("auto.offset.reset", "latest")

    val kafkaDS: DataStream[String] = env.addSource(
      new FlinkKafkaConsumer011[String](
        "sensor",
        new SimpleStringSchema(),
        properties)
    )
    kafkaDS.print()
    env.execute("sensor")
  }
}

你可能感兴趣的:(flink,学习,scala)