import java.io.{ByteArrayOutputStream, ObjectOutputStream}
val schema = new Parser().parse(schemaString)
// 将schema序列化成字节数组
val baos = new ByteArrayOutputStream()
val oos = new ObjectOutputStream(baos)
oos.writeObject(schema)
val schemaBytes = baos.toByteArray
import java.io.{ByteArrayInputStream, ObjectInputStream}
// 将字节数组反序列化成schema对象
val bais = new ByteArrayInputStream(schemaBytes)
val ois = new ObjectInputStream(bais)
val schema = ois.readObject().asInstanceOf[org.apache.avro.Schema]
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
import org.apache.avro.Schema
import org.apache.spark.sql.ForeachWriter
import org.apache.spark.sql.Row
class KafkaAvroForeachWriter(schema: Schema, kafkaConfig: Map[String, String], kafkaTopic: String) extends ForeachWriter[Row] {
var kafkaProducer: KafkaProducer[String, Array[Byte]] = _
override def open(partitionId: Long, version: Long): Boolean = {
kafkaProducer = new KafkaProducer(kafkaConfig)
true
}
override def process(row: Row): Unit = {
val record = new GenericData.Record(schema)
record.put(0, row.getString(0))
val outputStream = new ByteArrayOutputStream()
val objectOutputStream = new ObjectOutputStream(outputStream)
objectOutputStream.writeObject(schema)
objectOutputStream.flush()
val avroSchemaBytes = outputStream.toByteArray
objectOutputStream.close()
outputStream.close()
val avroWriter = new SpecificDatumWriter[GenericRecord](schema)
val out = new ByteArrayOutputStream()
val encoder = EncoderFactory.get().binaryEncoder(out, null)
avroWriter.write(record, encoder)
encoder.flush()
out.close()
val value = out.toByteArray
kafkaProducer.send(new ProducerRecord(kafkaTopic, null, value, avroSchemaBytes)).get()
}
override def close(errorOrNull: Throwable): Unit = {
kafkaProducer.close()
}
private def deserializeSchema(bytes: Array[Byte]): Schema = {
val bais = new ByteArrayInputStream(bytes)
val ois = new ObjectInputStream(bais)
val schema = ois.readObject().asInstanceOf[Schema]
ois.close()
bais.close()
schema
}
}