flink对接kafka并且写进hdfs上(只有代码)

import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08
import org.apache.flink.streaming.connectors.fs.StringWriter
import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink

object FlinkHDFS {
  import org.apache.flink.api.scala._
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    val properties = new Properties()
    // kafka和zookeeper的连接端口,可以写多个
    properties.setProperty("bootstrap.servers","node2:9092")
    properties.setProperty("zookeeper.connect","node2:2181")
    properties.setProperty("group.id","haha")// 这里不重要
    val test: DataStream[String] = env.addSource(new FlinkKafkaConsumer08[String]("test",new SimpleStringSchema(),properties))
    test.print()


    // 存储到hdfs上
    val sink = new BucketingSink[String]("hdfs://node01:9000/flink/output")
    sink.setWriter(new StringWriter[String]()).setBatchSize(20).setBatchRolloverInterval(2000)
    test.addSink(sink)

    env.execute("Hdfs")
  }
}

你可能感兴趣的:(flink对接kafka并且写进hdfs上(只有代码))