Flink消费Kafka:自定义KafkaDeserializationSchema

一、pom文件中添加依赖:

    
        
            org.apache.flink
            flink-java
            ${flink.version}
        
        
            org.apache.flink
            flink-scala_2.11
            ${flink.version}
        
        
            org.apache.flink
            flink-streaming-java_2.11
            ${flink.version}
        
        
            org.apache.flink
            flink-streaming-scala_2.11
            ${flink.version}
        
        
            org.apache.flink
            flink-clients_2.11
            ${flink.version}
        
        
            org.apache.flink
            flink-connector-wikiedits_2.11
            ${flink.version}
        
        
            org.apache.flink
            flink-connector-kafka-0.10_2.11
            ${flink.version}
        
        
            org.apache.kafka
            kafka_2.11
            0.10.2.0
            
                
                    org.slf4j
                    slf4j-log4j12
                
                
                    log4j
                    log4j
                
            
        


        
            org.slf4j
            slf4j-simple
            1.7.25
            compile
        
    

二、代码

import com.lzw.example.utils.ServiceConf
import com.lzw.example.utils.serialization.RecordKafkaSchema
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.flink.api.java.tuple.Tuple2

object KafkaCount {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.createLocalEnvironment()
    val kafka:Properties = ServiceConf.getRes("/kafka.properties")
    // val schema = new TypeInformationKeyValueSerializationSchema(classOf[String], classOf[String], env.getConfig)
    // val schema =new SimpleStringSchema()
    // val schema = new CustomKafkaSchema
    val schema = new RecordKafkaSchema
    val kafkaConsumer = new FlinkKafkaConsumer010[ConsumerRecord[String, String]]("sgz_bi_player3_2", schema, kafka)
    kafkaConsumer.setStartFromEarliest()
    val value = env.addSource(kafkaConsumer)
    value.print()
    env.execute()
  }
  
}
kafka.properties的内容:
bootstrap.servers=bigdata151:9092,bigdata152:9092,bigdata153:9092,bigdata127:9092,bigdata128:9092,bigdata129:9092
#zookeeper.connect=bigdata153:2181,bigdata152:2181,bigdata151:2181
group.id=flink

问题:使用Flink已经定义好的反序列化shema

1、SimpleStringSchema:返回的结果只有Kafka的value,而没有其它信息:

    val schema =new SimpleStringSchema()
    val kafkaConsumer = new FlinkKafkaConsumer010[String]("sgz_bi_player3_2", schema, kafka)

2、TypeInformationKeyValueSerializationSchema:返回的结果只有Kafka的key,value,而没有其它信息:

    val schema = new TypeInformationKeyValueSerializationSchema(classOf[String], classOf[String], env.getConfig)
    val kafkaConsumer = new FlinkKafkaConsumer010[Tuple2[String,String]]("sgz_bi_player3_2", schema, kafka)

3、很多时候我们需要获得Kafka的topic或者其它信息,就需要通过实现KafkaDeserializationSchema接口来自定义返回数据的结构:

val schema = new RecordKafkaSchema
val kafkaConsumer = new FlinkKafkaConsumer010[ConsumerRecord[String, String]]("sgz_bi_player3_2", schema, kafka)

RecordKafkaSchema.scala:


import org.apache.flink.api.common.typeinfo.{TypeHint, TypeInformation}
import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema
import org.apache.kafka.clients.consumer.ConsumerRecord

/**
 * @Author LZW
 * @Date 2020/1/17 16:30
 **/

class RecordKafkaSchema extends KafkaDeserializationSchema[ConsumerRecord[String, String]] {

  override def isEndOfStream(nextElement: ConsumerRecord[String, String]): Boolean = false

  override def deserialize(record: ConsumerRecord[Array[Byte], Array[Byte]]): ConsumerRecord[String, String] = {
    var key: String = null
    var value: String = null
    if (record.key != null) {
      key = new String(record.key())
    }
    if (record.value != null) {
      value = new String(record.value())
    }
    new ConsumerRecord[String, String](
      record.topic(),
      record.partition(),
      record.offset(),
      record.timestamp(),
      record.timestampType(),
      record.checksum,
      record.serializedKeySize,
      record.serializedValueSize(),
      key,
      value)
  }

  override def getProducedType: TypeInformation[ConsumerRecord[String, String]] = TypeInformation.of(new TypeHint[ConsumerRecord[String, String]] {})
}

 

 

你可能感兴趣的:(Flink,流式计算,大数据,flink,kafka)