package com.hupu.dace.spark.streaming import java.util.Properties import com.hupu.dace.hbaserestful.util.HdfsUtil import DaceFunctions._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} /** * Created by xiaojun on 2015/5/20. */ object PalPVUVProducer { def main(args: Array[String]) { if (args.length < 2) { System.err.println("Usage: PVUVProducer <metadataBrokerList> <topic> ") System.exit(1) } val Array(brokers, topicName, _*) = args // Zookeper connection properties val props = new Properties() (2 until args.length).foreach(i => { val pieces = args(i).split("=") if (pieces.length != 2) throw new IllegalArgumentException("Invalid property: " + args(i)) props.put(pieces(0), pieces(1)) }) props.put("metadata.broker.list", brokers) //props.put("serializer.class", "kafka.serializer.StringEncoder") props.put("serializer.class", "org.apache.kafka.common.serialization.ByteArraySerializer") //props.put("bootstrap.servers", brokers) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer") props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer") val producer: KafkaProducer[Array[Byte], Array[Byte]] = new KafkaProducer[Array[Byte], Array[Byte]](props) val lines = HdfsUtil.getString("/user/hive/warehouse/rd_e_pal/dt=20150312/hr=03/rd_e_pal.20150312.03-230").split("\n").filter(line => { val l = line.split("\001") l.length >= 31 && l(4).matches( """\d+""") && filter(l(5)) == "i" && filter(l(11)) != "\\N" && filter(l(11)) != null }) val lines2 = HdfsUtil.getString("/user/hive/warehouse/rd_e_pal/dt=20150312/hr=03/rd_e_pal.20150312.03-231").split("\n").filter(line => { val l = line.split("\001") l.length >= 31 && l(4).matches( """\d+""") && filter(l(5)) == "i" && filter(l(11)) != "\\N" && filter(l(11)) != null }) val start = System.currentTimeMillis() (1 to 1).foreach(n => { lines.foreach(line => { val record = new ProducerRecord[Array[Byte], Array[Byte]](topicName, line.getBytes()) producer.send(record) }) lines2.foreach(line => { val record = new ProducerRecord[Array[Byte], Array[Byte]](topicName, line.getBytes()) producer.send(record) }) }) println("---------cost:" + (System.currentTimeMillis() - start)) producer.close() } }
kafka 8.2.1新版API是异步写的,效率非常高.
参数传递: