序列化二进制

import java.io.{ByteArrayOutputStream, ObjectOutputStream}

 

val schema = new Parser().parse(schemaString)

 

// 将schema序列化成字节数组

val baos = new ByteArrayOutputStream()

val oos = new ObjectOutputStream(baos)

oos.writeObject(schema)

val schemaBytes = baos.toByteArray

import java.io.{ByteArrayInputStream, ObjectInputStream}

 

// 将字节数组反序列化成schema对象

val bais = new ByteArrayInputStream(schemaBytes)

val ois = new ObjectInputStream(bais)

val schema = ois.readObject().asInstanceOf[org.apache.avro.Schema]

 

 

 

 

 

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

 

import org.apache.avro.Schema

import org.apache.spark.sql.ForeachWriter

import org.apache.spark.sql.Row

 

class KafkaAvroForeachWriter(schema: Schema, kafkaConfig: Map[String, String], kafkaTopic: String) extends ForeachWriter[Row] {

 

  var kafkaProducer: KafkaProducer[String, Array[Byte]] = _

 

  override def open(partitionId: Long, version: Long): Boolean = {

    kafkaProducer = new KafkaProducer(kafkaConfig)

    true

  }

 

  override def process(row: Row): Unit = {

    val record = new GenericData.Record(schema)

    record.put(0, row.getString(0))

    val outputStream = new ByteArrayOutputStream()

    val objectOutputStream = new ObjectOutputStream(outputStream)

    objectOutputStream.writeObject(schema)

    objectOutputStream.flush()

    val avroSchemaBytes = outputStream.toByteArray

    objectOutputStream.close()

    outputStream.close()

    val avroWriter = new SpecificDatumWriter[GenericRecord](schema)

    val out = new ByteArrayOutputStream()

    val encoder = EncoderFactory.get().binaryEncoder(out, null)

    avroWriter.write(record, encoder)

    encoder.flush()

    out.close()

    val value = out.toByteArray

    kafkaProducer.send(new ProducerRecord(kafkaTopic, null, value, avroSchemaBytes)).get()

  }

 

  override def close(errorOrNull: Throwable): Unit = {

    kafkaProducer.close()

  }

 

  private def deserializeSchema(bytes: Array[Byte]): Schema = {

    val bais = new ByteArrayInputStream(bytes)

    val ois = new ObjectInputStream(bais)

    val schema = ois.readObject().asInstanceOf[Schema]

    ois.close()

    bais.close()

    schema

  }

}

 

你可能感兴趣的:(kafka)