从Kafka中读取数据,数据做转换后,并将数据写入到文件中
package com.hjt.yxh.hw.apitest
import org.apache.flink.api.common.eventtime.WatermarkStrategy
import org.apache.flink.api.common.serialization.{SimpleStringEncoder, SimpleStringSchema}
import org.apache.flink.connector.kafka.source.KafkaSource
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
object FileSinkTest {
def main(args: Array[String]): Unit = {
//创建环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
//添加一个从Kafka读取数据的源,做数据转换后写入到Kakfa
val kafkaSource: KafkaSource[String] = KafkaSource.builder[String]()
.setBootstrapServers("192.168.0.52:9092,192.168.0.109:9092,192.168.0.115:9092")
.setTopics("sensor")
.setValueOnlyDeserializer(new SimpleStringSchema())
.setGroupId("myGroup")
.setStartingOffsets(OffsetsInitializer.latest())
.build()
val dataStream = env.fromSource(kafkaSource,WatermarkStrategy.noWatermarks(),"kafkaSource")
val sensorStream = dataStream.map(data=>{
val arry = data.split(",")
SensorReading(arry(0),arry(1).toLong,arry(2).toDouble)
})
sensorStream.print()
sensorStream.addSink(StreamingFileSink.forRowFormat(new Path("D:\\LearnWorkSpace\\FlinkDemo\\src\\main\\resources\\file-sink-out"),new SimpleStringEncoder[SensorReading]()).build())
env.execute("flink test")
}
}
方式二:使用FileSink
org.apache.flink
flink-connector-files
1.14.4
package com.hjt.yxh.hw.apitest
import org.apache.flink.api.common.eventtime.WatermarkStrategy
import org.apache.flink.api.common.serialization.{SimpleStringEncoder, SimpleStringSchema}
import org.apache.flink.connector.file.sink.FileSink
import org.apache.flink.connector.kafka.source.KafkaSource
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
object FileSinkTest {
def main(args: Array[String]): Unit = {
//创建环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
//添加一个从Kafka读取数据的源,做数据转换后写入到Kakfa
val kafkaSource: KafkaSource[String] = KafkaSource.builder[String]()
.setBootstrapServers("k8s-node3:9092,k8s-node5:9092,k8s-node8:9092")
.setTopics("mytopic")
.setValueOnlyDeserializer(new SimpleStringSchema())
.setGroupId("myGroup")
.setStartingOffsets(OffsetsInitializer.latest())
.build()
val dataStream = env.fromSource(kafkaSource,WatermarkStrategy.noWatermarks(),"kafkaSource")
val sensorStream = dataStream.map(data=>{
val arry = data.split(",")
SensorReading(arry(0),arry(1).toLong,arry(2).toDouble)
})
val outPath = "D:\\LearnWorkSpace\\FlinkDemo\\src\\main\\resources\\file-sink-out"
val fileSink = FileSink.forRowFormat(new Path(outPath),new SimpleStringEncoder[SensorReading]()).build()
sensorStream.sinkTo(fileSink)
env.execute("flink test")
}
}
添加maven依赖,在pom.xml文件中添加如下依赖包
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-streaming-scala_2.12artifactId>
<version>1.14.4version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-scala_2.12artifactId>
<version>1.14.4version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-clients_2.12artifactId>
<version>1.14.4version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-kafka_2.12artifactId>
<version>1.14.4version>
dependency>
代码:
package com.hjt.yxh.hw.apitest
import org.apache.flink.api.common.eventtime.WatermarkStrategy
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.connector.kafka.sink.{KafkaRecordSerializationSchema, KafkaSink}
import org.apache.flink.connector.kafka.source.{KafkaSource}
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
object KafkaSinkTest {
def main(args: Array[String]): Unit = {
//创建环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
//添加一个从Kafka读取数据的源,做数据转换后写入到Kakfa
val kafkaSource: KafkaSource[String] = KafkaSource.builder[String]()
.setBootstrapServers("192.168.0.52:9092,192.168.0.109:9092,192.168.0.115:9092")
.setTopics("sensor")
.setValueOnlyDeserializer(new SimpleStringSchema())
.setGroupId("myGroup")
.setStartingOffsets(OffsetsInitializer.latest())
.build()
val dataStream = env.fromSource(kafkaSource,WatermarkStrategy.noWatermarks(),"kafkaSource")
val sensorStream = dataStream.map(data=>{
val arry = data.split(",")
SensorReading(arry(0),arry(1).toLong,arry(2).toDouble).toString
})
val kafkaSink = KafkaSink.builder[String]()
.setBootstrapServers("192.168.0.52:9092,192.168.0.109:9092,192.168.0.115:9092")
.setRecordSerializer(
KafkaRecordSerializationSchema.builder()
.setTopic("mytopic")
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
sensorStream.sinkTo(kafkaSink.build())
env.execute("flink test")
}
}
添加maven依赖,在pom.xml文件中添加如下依赖包
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-streaming-scala_2.12artifactId>
<version>1.14.4version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-scala_2.12artifactId>
<version>1.14.4version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-clients_2.12artifactId>
<version>1.14.4version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-elasticsearch7_2.12artifactId>
<version>1.14.4version>
dependency>
package com.hjt.yxh.hw.apitest
import java.util
import org.apache.flink.api.scala._
import org.apache.flink.api.common.functions.RuntimeContext
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.elasticsearch.{ ElasticsearchSinkFunction, RequestIndexer}
import org.apache.flink.streaming.connectors.elasticsearch7.{ElasticsearchSink}
import org.apache.http.HttpHost
import org.elasticsearch.client.{Requests}
import org.elasticsearch.common.xcontent.{XContentType}
object EsSinkTest {
def main(args: Array[String]): Unit = {
//1.创建环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val inputPath = "D:\\\\LearnWorkSpace\\\\FlinkDemo\\\\src\\\\main\\\\resources\\\\Data\\\\sensor.txt"
val dataStream = env.readTextFile(inputPath)
val sensorReadingStream = dataStream.map(data=>{
val arry = data.split(",")
SensorReading(arry(0),arry(1).toLong,arry(2).toDouble)
})
val httpHosts = new java.util.ArrayList[HttpHost]
httpHosts.add(new HttpHost("192.168.0.52", 9200, "http"))
httpHosts.add(new HttpHost("192.168.0.115", 9200, "http"))
val myEsSinkFunc = new ElasticsearchSinkFunction[SensorReading] {
override def process(element: SensorReading, ctx: RuntimeContext, indexer: RequestIndexer): Unit = {
val dataSource = new util.HashMap[String,String]()
dataSource.put("id",element.id)
dataSource.put("temperature",element.temperature.toString)
dataSource.put("timestamp",element.timestamp.toString)
//创建index request,用于发送请求
val indexRequest = Requests.indexRequest()
.index("sensor")
.source(dataSource,XContentType.JSON)
indexer.add(indexRequest)
}
}
val esSinkBuilder = new ElasticsearchSink.Builder[SensorReading](httpHosts,myEsSinkFunc)
sensorReadingStream.print()
sensorReadingStream.addSink(esSinkBuilder.build()).setParallelism(2)
env.execute("elasticsearch Sink Test")
}
}
org.apache.flink
flink-streaming-scala_2.12
1.14.4
org.apache.flink
flink-scala_2.12
1.14.4
org.apache.flink
flink-clients_2.12
1.14.4
org.apache.bahir
flink-connector-redis_2.11
1.0
package com.hjt.yxh.hw.apitest
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.redis.RedisSink
import org.apache.flink.streaming.connectors.redis.common.config.{FlinkJedisConfigBase, FlinkJedisPoolConfig}
import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper}
object RedisSinkTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
val inputPath = "D:\\LearnWorkSpace\\FlinkDemo\\src\\main\\resources\\Data\\sensor.txt"
val dataStream = env.readTextFile(inputPath)
val sensorStream = dataStream.map(data=>{
val arry = data.split(",")
SensorReading(arry(0),arry(1).toLong,arry(2).toDouble)
})
//写入到Redis
val jedisConfig = new FlinkJedisPoolConfig.Builder()
.setHost("192.168.0.46")
.setDatabase(0)
.build()
val myRedisSinkFunction = new RedisSink(jedisConfig,new MyRedisMapper)
sensorStream.addSink(myRedisSinkFunction)
env.execute("redis sink test")
}
}
class MyRedisMapper extends RedisMapper[SensorReading] {
override def getCommandDescription: RedisCommandDescription = {
val redisCommandDescription = new RedisCommandDescription(RedisCommand.HSET,"DATA_TYPE_SENSOR")
return redisCommandDescription
}
override def getKeyFromData(data: SensorReading): String = {
data.id
}
override def getValueFromData(data: SensorReading): String = {
data.toString
}
}
org.apache.flink
flink-streaming-scala_2.12
1.14.4
org.apache.flink
flink-scala_2.12
1.14.4
org.apache.flink
flink-clients_2.12
1.14.4
org.apache.flink
flink-jdbc_2.12
1.10.3
mysql
mysql-connector-java
5.1.49
代码:
package com.hjt.yxh.hw.apitest
import java.sql.{Connection, DriverManager, PreparedStatement}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction
import org.apache.flink.streaming.api.scala._
object JdbcSinkTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
val inputPath = "D:\\LearnWorkSpace\\FlinkDemo\\src\\main\\resources\\Data\\sensor.txt"
val dataStream = env.readTextFile(inputPath)
val sensorStream = dataStream.map(data=>{
val arry = data.split(",")
SensorReading(arry(0),arry(1).toLong,arry(2).toDouble)
})
sensorStream.addSink(new MySqlSink())
env.execute()
}
}
class MySqlSink extends RichSinkFunction[SensorReading]{
var conn:Connection = _
var insertStatement:PreparedStatement = _
var updateStatement:PreparedStatement = _
override def open(parameters: Configuration): Unit = {
conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:3306/news","root","mysqlroot")
insertStatement = conn.prepareStatement("insert into sensor(sensorId,timestamp,temperature) values(?,?,?)")
updateStatement = conn.prepareStatement("update sensor set timestamp = ? ,temperature = ? where sensorId = ?")
}
override def invoke(value: SensorReading): Unit = {
updateStatement.setLong(1,value.timestamp)
updateStatement.setDouble(2,value.temperature)
updateStatement.setString(3,value.id)
updateStatement.execute()
if (updateStatement.getUpdateCount == 0)
{
insertStatement.setString(1,value.id)
insertStatement.setLong(2,value.timestamp)
insertStatement.setDouble(3,value.temperature)
insertStatement.execute()
}
}
override def close(): Unit = {
insertStatement.close()
updateStatement.close()
conn.close()
}
}