一、pom文件中添加依赖:
org.apache.flink
flink-java
${flink.version}
org.apache.flink
flink-scala_2.11
${flink.version}
org.apache.flink
flink-streaming-java_2.11
${flink.version}
org.apache.flink
flink-streaming-scala_2.11
${flink.version}
org.apache.flink
flink-clients_2.11
${flink.version}
org.apache.flink
flink-connector-wikiedits_2.11
${flink.version}
org.apache.flink
flink-connector-kafka-0.10_2.11
${flink.version}
org.apache.kafka
kafka_2.11
0.10.2.0
org.slf4j
slf4j-log4j12
log4j
log4j
org.slf4j
slf4j-simple
1.7.25
compile
二、代码
import com.lzw.example.utils.ServiceConf
import com.lzw.example.utils.serialization.RecordKafkaSchema
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.flink.api.java.tuple.Tuple2
object KafkaCount {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.createLocalEnvironment()
val kafka:Properties = ServiceConf.getRes("/kafka.properties")
// val schema = new TypeInformationKeyValueSerializationSchema(classOf[String], classOf[String], env.getConfig)
// val schema =new SimpleStringSchema()
// val schema = new CustomKafkaSchema
val schema = new RecordKafkaSchema
val kafkaConsumer = new FlinkKafkaConsumer010[ConsumerRecord[String, String]]("sgz_bi_player3_2", schema, kafka)
kafkaConsumer.setStartFromEarliest()
val value = env.addSource(kafkaConsumer)
value.print()
env.execute()
}
}
kafka.properties的内容:
bootstrap.servers=bigdata151:9092,bigdata152:9092,bigdata153:9092,bigdata127:9092,bigdata128:9092,bigdata129:9092
#zookeeper.connect=bigdata153:2181,bigdata152:2181,bigdata151:2181
group.id=flink
问题:使用Flink已经定义好的反序列化shema
1、SimpleStringSchema:返回的结果只有Kafka的value,而没有其它信息:
val schema =new SimpleStringSchema()
val kafkaConsumer = new FlinkKafkaConsumer010[String]("sgz_bi_player3_2", schema, kafka)
2、TypeInformationKeyValueSerializationSchema:返回的结果只有Kafka的key,value,而没有其它信息:
val schema = new TypeInformationKeyValueSerializationSchema(classOf[String], classOf[String], env.getConfig)
val kafkaConsumer = new FlinkKafkaConsumer010[Tuple2[String,String]]("sgz_bi_player3_2", schema, kafka)
3、很多时候我们需要获得Kafka的topic或者其它信息,就需要通过实现KafkaDeserializationSchema接口来自定义返回数据的结构:
val schema = new RecordKafkaSchema
val kafkaConsumer = new FlinkKafkaConsumer010[ConsumerRecord[String, String]]("sgz_bi_player3_2", schema, kafka)
RecordKafkaSchema.scala:
import org.apache.flink.api.common.typeinfo.{TypeHint, TypeInformation}
import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema
import org.apache.kafka.clients.consumer.ConsumerRecord
/**
* @Author LZW
* @Date 2020/1/17 16:30
**/
class RecordKafkaSchema extends KafkaDeserializationSchema[ConsumerRecord[String, String]] {
override def isEndOfStream(nextElement: ConsumerRecord[String, String]): Boolean = false
override def deserialize(record: ConsumerRecord[Array[Byte], Array[Byte]]): ConsumerRecord[String, String] = {
var key: String = null
var value: String = null
if (record.key != null) {
key = new String(record.key())
}
if (record.value != null) {
value = new String(record.value())
}
new ConsumerRecord[String, String](
record.topic(),
record.partition(),
record.offset(),
record.timestamp(),
record.timestampType(),
record.checksum,
record.serializedKeySize,
record.serializedValueSize(),
key,
value)
}
override def getProducedType: TypeInformation[ConsumerRecord[String, String]] = TypeInformation.of(new TypeHint[ConsumerRecord[String, String]] {})
}