kafka的生产和消费,用akka实现
通过application构建调度器,通过配置选择相应读写模式,在调度器actor中构建对应的生产actor和消费actor,加入到读队列和写队列,在生产和消费的actor的初始化过程中向调度actor发送一个包含路径的注册信息,当调度actor接收到该消息后则启动一个定时任务,在调度时间结束后发送一个终止某actor的命令,在写队列和读队列都为空的时候,关闭系统
kafkaStream是线程阻塞的,需要放在额外的线程池里面去做,其余问题不大。
配置信息用typesafe的conf,日志用logback
application.conf
akka {
loggers = ["akka.event.slf4j.Slf4jLogger"]
loglevel = "INFO"
logging-filter = "akka.event.slf4j.Slf4jLoggingFilter"
log-dead-letters = 0
}
common {
numMessage = "20000" #默认发送的消息数
scheduler.time = "20" #默认的调度实践
mode = "readwrite" #模式,分为write,read和readwrite三种模式
threadNum = "10" #提供的资源池的线程数,主要防止kafkaconsumer造成的阻塞
actor = "10"
timeout = "100"
}
consumer {
bootstrap.servers = "ctao-machine:9092"
group.id = "mytest"
zookeeper.connect = "ctao-machine:2181"
host = "ctao-machine"
port = "2181"
bufferSize = "100"
clientId = "typesafe"
topic = "testctao"
zookeeper.sync.time.ms = "200"
auto.commit.interval.ms = "1000"
zookeeper.session.timeout.ms = "5000"
zookeeper.connection.timeout.ms = "10000"
rebalance.backoff.ms = "2000"
rebalance.max.retries = "10"
key.deserializer = "org.apache.kafka.common.serialization.StringDeserializer"
value.deserializer = "com.linewell.akkakafka.common.deserializer.LongDeserializer"
}
producer {
metadata.broker.list = "ctao-machine:9092"
key.serializer = "org.apache.kafka.common.serialization.StringSerializer"
value.serializer = "com.linewell.akkakafka.common.serializer.LongSerializer"
bootstrap.servers = "ctao-machine:9092"
}
日志
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} %-5level %logger{36} - %msg%n%rEx</pattern>
</encoder>
</appender>
<logger name="org.apache" level="WARN"/>
<logger name="kafka" level="OFF"/>
<logger name="kafka.network.Processor" level="OFF" /> <!-- To silence expected IOExceptions on consumer shutdown -->
<logger name="org.apache.zookeeper.jmx" level="ERROR"/> <!-- To silence expected JMX errors on ZK shutdown -->
<logger name="kafka.server.KafkaApis" level="OFF" /> <!-- To silence expected occasional AdminOperationException on startup -->
<logger name="kafka.producer.async.DefaultEventHandler" level="OFF" /> <!-- To silence expected occasional 'Failed to collate messages by topic' on startup -->
<logger name="org.I0Itec" level="WARN"/>
<logger name="com.linewell" level="DEBUG"/>
<root level="ERROR">
<appender-ref ref="STDOUT" />
</root>
</configuration>
分为读、写、读写模式
package com.linewell.akkakafka.kafka.bean
/** * Created by ctao on 16-1-23. * 模式 */
sealed trait Mode
object Mode {
/** * 读模式 */
case object Read extends Mode
/** * 写模式 */
case object Write extends Mode
/** * 混合模式 */
case object Readwrite extends Mode
}
package com.linewell.akkakafka.kafka.bean
import akka.actor.ActorRef
/** * Created by ctao on 16-1-23. * 命令特质 */
sealed trait Command
object Command {
/** * 读模式actor初始化结束 * @param actorRef 路径 */
case class ReadInitialized(actorRef: ActorRef) extends Command
/** * 写模式actor初始化结束 * @param actorRef 路径 */
case class WriteInitialized(actorRef: ActorRef) extends Command
/** * 关闭actor * @param actorRef 路径 */
case class Stop(actorRef: ActorRef) extends Command
/** * 停止系统 */
case object Shutdown extends Command
/** * 开启任务 * @param num 初始actor数量 */
case class StartNumber(num: Int) extends Command
/** * 开启消费 */
case object StartConsume extends Command
/** * 开启生产 */
case object StartProduce extends Command
}
client包下的
package com.linewell.akkakafka.kafka.consume
import akka.actor.SupervisorStrategy.Escalate
import akka.actor._
import com.linewell.akkakafka.common.util.KafkaConsumerConfig
import com.linewell.akkakafka.kafka.bean.Command.{ReadInitialized, StartConsume}
import com.typesafe.config.ConfigFactory
import org.apache.kafka.clients.consumer.KafkaConsumer
import scala.collection.JavaConversions._
/** * Created by ctao on 16-1-27. * 用kafkaClinet实现的消费类 */
class NumberConsumerByKafkaClient extends Actor with ActorLogging with NumberConsume {
/** * 消费者 */
private var consumer: KafkaConsumer[String, Long] = _
private lazy val conf = ConfigFactory.load()
private val topic = conf.getString("consumer.topic")
private val timeOut = conf.getLong("common.timeout")
/** * 启动前调用 */
override def preStart(): Unit = {
initConsumer()
context.parent ! ReadInitialized(self)
self ! StartConsume
}
/** * 如果收到开始消费则进行消费动作 */
override def receive: Receive = {
case StartConsume ⇒ consume(timeOut)
}
/** * 终止前调用 * @throws java.lang.Exception 异常 */
@throws[Exception](classOf[Exception])
override def postStop(): Unit = {
log.debug("stopping all consumer")
/** * 注销注册 */
consumer.unsubscribe()
/** * 关闭 */
consumer.close()
log.debug("stop all consumer")
}
/** * 消费方法 * @param timeOut 时间范围 */
private def consume(timeOut: Long): Unit = {
consumer.poll(timeOut).foreach { record ⇒
log.info(s"${self.path.toString} receive ${record.key} value ${record.value} " +
s"offset ${record.offset} partition ${record.partition} topic ${record.topic}")
}
consumer.commitAsync()
}
/** * actor的策略 */
override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() {
case e: Exception ⇒
//handle failing kafka
log.error(s"Read failed $e")
Escalate
}
/** * 初始化actor */
private def initConsumer() = {
log.debug(s"Config ${KafkaConsumerConfig()}")
consumer = new KafkaConsumer[String, Long](KafkaConsumerConfig())
consumer.subscribe(Vector(topic))
}
}
object NumberConsumerByKafkaClient {
/** * 生产actor * @return 参数包含actor */
def props: Props = Props(new NumberConsumerByKafkaClient)
}
另一种用阻塞的kafkaStrema的:
package com.linewell.akkakafka.kafka.consume
import java.util.concurrent.Executors
import akka.actor.SupervisorStrategy.Escalate
import akka.actor._
import com.linewell.akkakafka.common.deserializer.{Decoder, LongDeserializer}
import com.linewell.akkakafka.common.util.KafkaConsumerConfig
import com.linewell.akkakafka.kafka.bean.Command._
import com.linewell.akkakafka.kafka.consume.NumberConsumerByKafkaStream.Consume
import com.typesafe.config.ConfigFactory
import kafka.consumer.{Consumer, ConsumerConfig, ConsumerConnector, KafkaStream}
import kafka.message.MessageAndMetadata
import org.apache.kafka.common.serialization.StringDeserializer
import scala.async.Async._
import scala.concurrent.{ExecutionContext, ExecutionContextExecutor}
import scala.util.{Failure, Success, Try}
/** * Created by ctao on 16-1-23. * kafkastream实现的消费,含有阻塞 */
class NumberConsumerByKafkaStream extends Actor with ActorLogging {
val conf = ConfigFactory.load()
/** * 消费连接 */
private var consumer: Try[ConsumerConnector] = _
/** * 线程池线程数,主要提供给kafka消费,将阻塞放在单独池中 */
val threadNum = conf.getInt("common.threadNum")
private val executor = Executors.newFixedThreadPool(threadNum)
val topic = conf.getString("consumer.topic")
/** * kafkaStream,在关闭前clean */
private var streams: Option[List[KafkaStream[String, Long]]] = None
override def receive: Receive = {
/** * 开始消费,则获取stream传递给自己 */
case StartConsume ⇒ consumer.foreach { (consumer: ConsumerConnector) ⇒
val consumerStreams = consumer.createMessageStreams(Map(topic → 1)
, Decoder(topic, new StringDeserializer),
Decoder(topic, new LongDeserializer))
streams = Option(consumerStreams(topic))
if (streams.isDefined) {
log.info(s"Got streams ${streams.get.length} $streams")
streams.get.foreach { kafkaStream ⇒
self ! Consume(kafkaStream)
}
}
}
/** * 如果是stream,则放在池中 */
case Consume(kafkaStream) ⇒
log.info(s"Handling KafkaStream ${kafkaStream.clientId}")
implicit val executionContextExecutor: ExecutionContextExecutor = ExecutionContext.fromExecutor(executor)
async {
kafkaStream.iterator().foreach {
case msg: MessageAndMetadata[String, Long] ⇒
log.info(s"${self.path.toString} : kafkaStream ${kafkaStream.clientId} " +
s" received offset ${msg.offset} partition ${msg.partition} value ${msg.message}")
}
}
}
override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() {
case e: Exception ⇒
//handle failing kafka
log.error(s"Read failed $e")
Escalate
}
/** * 开始前动作 */
override def preStart(): Unit = {
super.preStart()
consumer = Try(Consumer.create(consumerConfig))
consumer match {
case Success(c) ⇒ context.parent ! ReadInitialized(self)
self ! StartConsume
case Failure(e) ⇒
log.error(e, "Could not create kafkaConsumer")
context.parent ! Shutdown
}
}
/** * 结束前动作 * @throws java.lang.Exception 异常 */
@throws[Exception](classOf[Exception])
override def postStop(): Unit = {
if (streams.isDefined) {
log.debug("cleaning streams")
streams.get.foreach(_.clear())
log.debug("cleaned streams")
}
log.debug("stopping all consumer")
consumer.foreach(_.shutdown())
log.debug("stop all consumer")
log.debug("shutting down execution")
executor.shutdown()
log.debug("shutdown execution")
}
/** * 消费配置 */
private val consumerConfig: ConsumerConfig = new ConsumerConfig(KafkaConsumerConfig())
}
object NumberConsumerByKafkaStream {
/** * 返回包含消费者的参数 * @return */
def props: Props = Props(new NumberConsumerByKafkaStream())
/** * 包含kafkaStream的消费类 * @param kafkaStream 流 */
private case class Consume(kafkaStream: KafkaStream[String, Long])
}
package com.linewell.akkakafka.kafka.produce
import akka.actor.SupervisorStrategy.Resume
import akka.actor._
import akka.event.LoggingReceive
import com.linewell.akkakafka.common.util.KafkaProducerConfig
import com.linewell.akkakafka.kafka.bean.Command.{StartProduce, WriteInitialized}
import com.typesafe.config.ConfigFactory
import org.apache.kafka.clients.producer._
/** * Created by ctao on 16-1-23. */
class NumberProducer extends Actor with ActorLogging {
private val conf = ConfigFactory.load()
/** * 产生数字数量 */
val numMessage = conf.getInt("common.numMessage")
val topic = conf.getString("consumer.topic")
override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() {
case e: Exception ⇒
//can handle failing here
log.error(s"Write failed $e")
Resume
}
private var producer: KafkaProducer[String, Long] = _
/** * 启动前执行 */
override def preStart(): Unit = {
producer = initProducer()
context.parent ! WriteInitialized(self)
self ! StartProduce
}
/** * 接收到开始生产后则调用生产函数 */
override def receive: Receive = LoggingReceive {
case StartProduce ⇒ produce(producer, numMessage)
}
/** * 结束前调用 * @throws java.lang.Exception 异常 */
@throws[Exception](classOf[Exception])
override def postStop(): Unit = {
log.debug("closing producer")
producer.close()
log.debug("closed producer")
}
/** * 生产函数 * @param producer 生产者 * @param numMessage 消息数量 */
private def produce(producer: KafkaProducer[String, Long], numMessage: Int): Unit = {
(1 to numMessage).foreach { messageNum ⇒
val message = new ProducerRecord[String, Long](topic, (messageNum + 1).toString, messageNum)
producer.send(message, new Callback {
override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = {
val maybeMetadata = Option(metadata)
val maybeException = Option(exception)
if (maybeMetadata.isDefined) {
log.info(s"actor ${self.path.toString}: $messageNum onCompletion offset ${metadata.offset},partition ${metadata.partition}")
}
if (maybeException.isDefined) {
log.error(exception, s"$messageNum onCompletion received error")
}
}
})
}
}
/** * 初始化 * @return 生产者 */
private def initProducer(): KafkaProducer[String, Long] = {
log.debug(s"Config ${KafkaProducerConfig()}")
new KafkaProducer[String, Long](KafkaProducerConfig())
}
}
object NumberProducer {
def props: Props = Props(new NumberProducer)
}
package com.linewell.akkakafka.kafka.coordination
import akka.actor.{Actor, ActorLogging, ActorRef}
import akka.stream.ActorMaterializer
import com.linewell.akkakafka.kafka.bean.Command._
import com.linewell.akkakafka.kafka.bean.{Command, Mode}
import com.linewell.akkakafka.kafka.consume.NumberConsumerByKafkaStream
import com.linewell.akkakafka.kafka.produce.NumberProducer
import com.typesafe.config.ConfigFactory
import scala.collection.mutable.ArrayBuffer
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
/** * Created by ctao on 16-1-23. * 调度类 */
class Coordinator extends Actor with ActorLogging {
import Coordinator._
val conf = ConfigFactory.load()
/** * 调度时间,类似定时器 */
val schedulerTime = conf.getInt("common.scheduler.time")
/** * 写actor队列 */
val writerBuffer: ArrayBuffer[Option[ActorRef]] = new ArrayBuffer[Option[ActorRef]]()
/** * 读actor队列 */
val readerBuffer: ArrayBuffer[Option[ActorRef]] = new ArrayBuffer[Option[ActorRef]]()
/** * 物化 */
lazy val mat = ActorMaterializer()(context)
override def receive: Receive = {
/** * 如果是包含startnum和模式的消息,则调用构建方法 */
case msg@InitialMessage(StartNumber(num), mode) ⇒
log.debug(s"Starting the numbers coordinator with $msg")
buildWriteOrBuildRead(mode, num)
/** * 无法识别的消息类型 */
case msg: InitialMessage ⇒
log.error(s"Did not understand $msg")
log.error("shutdown")
context.system.shutdown()
/** * actor在初始化结束后会向父节点发送消息,接收到读初始化结束后,启动一个调度时间的调度器,将stopactor * 的消息发给自己 */
case ReadInitialized(actorRef) ⇒
log.debug(s"Reader initialized :${actorRef.path.toString}")
context.system.scheduler.scheduleOnce(schedulerTime.seconds, self, Stop(actorRef))
log.debug(s"end scheduler stop ${actorRef.path.toString}")
/** * actor在初始化结束后会向父节点发送消息,接收到写初始化结束后,启动一个调度时间的调度器,将stopactor * 的消息发给自己 */
case WriteInitialized(actorRef) ⇒
log.debug(s"Writer initialized:${actorRef.path.toString}")
context.system.scheduler.scheduleOnce(schedulerTime.seconds, self, Stop(actorRef))
log.debug(s"end scheduler stop ${actorRef.path.toString}")
/** * 收到自己发送的stop消息后则对应队列移除actor */
case Stop(actorRef) ⇒
log.debug("Stopping the coordinator")
writerBuffer -= Some(actorRef)
readerBuffer -= Some(actorRef)
log.debug(s"writeBuffer.length ${writerBuffer.length} and readerBuffer.length ${readerBuffer.length}")
/** * 如果读写队列都为空,则给自己发送system的停止消息 */
if (writerBuffer.isEmpty && readerBuffer.isEmpty) {
context.system.scheduler.scheduleOnce(1.seconds, self, Shutdown)
}
/** * 停止system */
case Shutdown ⇒
log.debug("Shutting down the app")
context.system.shutdown()
log.info("shutdown the app")
}
/** * 构建读写模式方法 * @param mode 模式 * @param numActor actor数量 */
def buildWriteOrBuildRead(mode: Mode, numActor: Int): Unit = mode match {
/** * 写模式,则写队列增减对应数量actor */
case Mode.Write ⇒
log.debug("write mode")
(1 to numActor).foreach { x ⇒
val writer = Some(context.actorOf(NumberProducer.props, name = s"writerActor-$x"))
writerBuffer += writer
}
/** * 读模式,则读队列增减对应数量actor */
case Mode.Read ⇒
log.debug("read mode")
(1 to numActor).foreach { x ⇒
val reader = Some(context.actorOf(NumberConsumerByKafkaStream.props, name = s"readerActor-$x"))
readerBuffer += reader
}
/** * 读写模式,则读写队列各增减对应数量actor */
case Mode.Readwrite ⇒
log.debug("readwrite mode")
(1 to numActor).foreach { x ⇒
val writer = Some(context.actorOf(NumberProducer.props, name = s"writerActor-$x"))
val reader = Some(context.actorOf(NumberConsumerByKafkaStream.props, name = s"readerActor-$x"))
writerBuffer += writer
readerBuffer += reader
}
}
}
case object Coordinator {
/** * 初始化消息 * @param name 消息类型 * @param mode 模式 */
case class InitialMessage(name: Command, mode: Mode)
}
package com.linewell.akkakafka.kafka.application
import akka.actor.{ActorSystem, Props}
import com.linewell.akkakafka.kafka.bean.Command.StartNumber
import com.linewell.akkakafka.kafka.bean.Mode.{Readwrite, Write, Read}
import com.linewell.akkakafka.kafka.coordination.Coordinator
import com.linewell.akkakafka.kafka.coordination.Coordinator.InitialMessage
import com.typesafe.config.ConfigFactory
import org.slf4j.LoggerFactory
/** * Created by ctao on 16-1-23. */
object Application extends App {
val log = LoggerFactory.getLogger(this.getClass)
val system = ActorSystem("Kafka")
/** * 调度actor */
val coordinator = system.actorOf(Props(new Coordinator), name = "coordinator")
val conf = ConfigFactory.load()
private val numActor = conf.getInt("common.actor")
log.info(s"start app")
/** * app的模式 */
val appMode = conf.getString("common.mode") match {
case s:String ⇒ s.toUpperCase match {
case "READ" ⇒ Read
case "WRITE" ⇒ Write
case "READWRITE" ⇒ Readwrite
}
case _ ⇒ throw new IllegalArgumentException("can't load mode")
}
/** * 启动调度 */
coordinator ! InitialMessage(StartNumber(numActor),appMode)
}
package com.linewell.akkakafka.common.util
import java.util.Properties
import com.typesafe.config.ConfigFactory
/** * Created by ctao on 16-1-27. * kafka消费者配置消息 */
trait KafkaConsumerConfig extends Properties {
import KafkaConsumerConfig._
private val consumerPrefixWithDot = consumerPrefix + "."
val allKeys = Seq(groupId,
zookeeperConnect,
zookeeperConnectionTimeOut,
zookeeperSessionTimeOut,
reBalanceBackOff,
reBalanceMaxRetries,
keyDeserializer,
valueDeserializer,
servers
)
lazy val conf = ConfigFactory.load()
allKeys.map { key ⇒
if (conf.hasPath(key)) {
put(key.replace(consumerPrefixWithDot, ""), conf.getString(key))
}
}
}
object KafkaConsumerConfig {
val consumerPrefix = "consumer"
//Consumer Keys
val groupId = s"$consumerPrefix.group.id"
val zookeeperConnect = s"$consumerPrefix.zookeeper.connect"
val topic = s"$consumerPrefix.topic"
val zookeeperSessionTimeOut = s"$consumerPrefix.zookeeper.session.timeout.ms"
val zookeeperConnectionTimeOut = s"$consumerPrefix.zookeeper.connection.timeout.ms"
val reBalanceBackOff = s"$consumerPrefix.rebalance.backoff.ms"
val reBalanceMaxRetries = s"$consumerPrefix.rebalance.max.retries"
val keyDeserializer = s"$consumerPrefix.key.com.linewell.akkakafka.common.deserializer"
val valueDeserializer = s"$consumerPrefix.value.com.linewell.akkakafka.common.deserializer"
val servers = s"$consumerPrefix.bootstrap.servers"
def apply(): KafkaConsumerConfig = new KafkaConsumerConfig {}
}
package com.linewell.akkakafka.common.util
import java.util.Properties
import com.typesafe.config.ConfigFactory
import org.apache.kafka.clients.producer.ProducerConfig
/** * Created by ctao on 16-1-25. * kafka生产者配置信息 */
trait KafkaProducerConfig extends Properties {
import KafkaProducerConfig._
private val producerPrefixWithDot = producerPrefix + "."
private val allKeys = Seq(
brokers,
brokers,
keySerializer,
valueSerializer,
partitioner,
requiredAcks,
servers
)
lazy val conf = ConfigFactory.load()
allKeys.map { key ⇒
if (conf.hasPath(key)) {
put(key.replace(producerPrefixWithDot, ""), conf.getString(key))
}
}
}
object KafkaProducerConfig {
val producerPrefix = "producer"
//Producer Keys
val brokers = s"$producerPrefix.metadata.broker.list"
val keySerializer = s"$producerPrefix.key.com.linewell.akkakafka.common.serializer"
val valueSerializer = s"$producerPrefix.value.com.linewell.akkakafka.common.serializer"
val servers = s"$producerPrefix.bootstrap.servers"
val partitioner = s"$producerPrefix.partitioner.class"
val requiredAcks = s"$producerPrefix.request.required.acks"
def apply(): KafkaProducerConfig = new KafkaProducerConfig {}
}
package com.linewell.akkakafka.common.serializer
import java.util
import org.apache.kafka.common.serialization.Serializer
/** * Created by ctao on 16-1-26. * Long型的序列化,是kafka序列化的子类 */
class LongSerializer extends Serializer[Long] {
override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
override def serialize(topic: String, data: Long): Array[Byte] = BigInt(data).toByteArray
override def close(): Unit = ()
}
package com.linewell.akkakafka.common.deserializer
import java.util
import org.apache.kafka.common.serialization.Deserializer
/** * Created by ctao on 16-1-26. * Long解析类 */
class LongDeserializer extends Deserializer[Long]{
override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
override def close(): Unit = ()
override def deserialize(topic: String, data: Array[Byte]): Long = BigInt(data).toLong
}