actor模型下kafka消费

项目背景

kafka的生产和消费,用akka实现

实现思路

通过application构建调度器,通过配置选择相应读写模式,在调度器actor中构建对应的生产actor和消费actor,加入到读队列和写队列,在生产和消费的actor的初始化过程中向调度actor发送一个包含路径的注册信息,当调度actor接收到该消息后则启动一个定时任务,在调度时间结束后发送一个终止某actor的命令,在写队列和读队列都为空的时候,关闭系统

注意点

kafkaStream是线程阻塞的,需要放在额外的线程池里面去做,其余问题不大。

通用设置

配置信息用typesafe的conf,日志用logback
application.conf

akka {
  loggers = ["akka.event.slf4j.Slf4jLogger"]
  loglevel = "INFO"
  logging-filter = "akka.event.slf4j.Slf4jLoggingFilter"
  log-dead-letters = 0
}

common {
  numMessage = "20000" #默认发送的消息数
  scheduler.time = "20" #默认的调度实践
  mode = "readwrite" #模式,分为write,read和readwrite三种模式
  threadNum = "10" #提供的资源池的线程数,主要防止kafkaconsumer造成的阻塞
  actor = "10"
  timeout = "100"
}


consumer {
  bootstrap.servers = "ctao-machine:9092"
  group.id = "mytest"
  zookeeper.connect = "ctao-machine:2181"
  host = "ctao-machine"
  port = "2181"
  bufferSize = "100"
  clientId = "typesafe"
  topic = "testctao"
  zookeeper.sync.time.ms = "200"
  auto.commit.interval.ms = "1000"
  zookeeper.session.timeout.ms = "5000"
  zookeeper.connection.timeout.ms = "10000"
  rebalance.backoff.ms = "2000"
  rebalance.max.retries = "10"
  key.deserializer = "org.apache.kafka.common.serialization.StringDeserializer"
  value.deserializer = "com.linewell.akkakafka.common.deserializer.LongDeserializer"
}

producer {
  metadata.broker.list = "ctao-machine:9092"
  key.serializer = "org.apache.kafka.common.serialization.StringSerializer"
  value.serializer = "com.linewell.akkakafka.common.serializer.LongSerializer"
  bootstrap.servers = "ctao-machine:9092"
}

日志

<configuration>
    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
        <encoder>
            <pattern>%d{HH:mm:ss.SSS} %-5level %logger{36} - %msg%n%rExpattern>
        encoder>
    appender>

    <logger name="org.apache" level="WARN"/>
    <logger name="kafka" level="OFF"/>
    <logger name="kafka.network.Processor" level="OFF" /> 
    <logger name="org.apache.zookeeper.jmx" level="ERROR"/>  
    <logger name="kafka.server.KafkaApis" level="OFF" /> 
    <logger name="kafka.producer.async.DefaultEventHandler" level="OFF" /> 
    <logger name="org.I0Itec" level="WARN"/>
    <logger name="com.linewell" level="DEBUG"/>

    <root level="ERROR">
        <appender-ref ref="STDOUT" />
    root>
configuration>

模式

分为读、写、读写模式


package com.linewell.akkakafka.kafka.bean

/**
  * Created by ctao on 16-1-23.
  * 模式
  */
sealed trait Mode

object Mode {

  /**
    * 读模式
    */
  case object Read extends Mode

  /**
    * 写模式
    */
  case object Write extends Mode

  /**
    * 混合模式
    */
  case object Readwrite extends Mode
}

命令类型

package com.linewell.akkakafka.kafka.bean

import akka.actor.ActorRef

/**
  * Created by ctao on 16-1-23.
  * 命令特质
  */
sealed trait Command

object Command {

  /**
    * 读模式actor初始化结束
    * @param actorRef 路径
    */
  case class ReadInitialized(actorRef: ActorRef) extends Command

  /**
    * 写模式actor初始化结束
    * @param actorRef 路径
    */
  case class WriteInitialized(actorRef: ActorRef) extends Command

  /**
    * 关闭actor
    * @param actorRef 路径
    */
  case class Stop(actorRef: ActorRef) extends Command

  /**
    * 停止系统
    */
  case object Shutdown extends Command

  /**
    * 开启任务
    * @param num 初始actor数量
    */
  case class StartNumber(num: Int) extends Command

  /**
    * 开启消费
    */
  case object StartConsume extends Command

  /**
    * 开启生产
    */
  case object StartProduce extends Command

}

消费者

client包下的

package com.linewell.akkakafka.kafka.consume

import akka.actor.SupervisorStrategy.Escalate
import akka.actor._
import com.linewell.akkakafka.common.util.KafkaConsumerConfig
import com.linewell.akkakafka.kafka.bean.Command.{ReadInitialized, StartConsume}
import com.typesafe.config.ConfigFactory
import org.apache.kafka.clients.consumer.KafkaConsumer

import scala.collection.JavaConversions._

/**
  * Created by ctao on 16-1-27.
  * 用kafkaClinet实现的消费类
  */
class NumberConsumerByKafkaClient extends Actor with ActorLogging with NumberConsume {

  /**
    * 消费者
    */
  private var consumer: KafkaConsumer[String, Long] = _
  private lazy val conf = ConfigFactory.load()
  private val topic = conf.getString("consumer.topic")
  private val timeOut = conf.getLong("common.timeout")

  /**
    * 启动前调用
    */
  override def preStart(): Unit = {
    initConsumer()
    context.parent ! ReadInitialized(self)
    self ! StartConsume
  }

  /**
    * 如果收到开始消费则进行消费动作
    */
  override def receive: Receive = {
    case StartConsume ⇒ consume(timeOut)
  }

  /**
    * 终止前调用
    * @throws java.lang.Exception 异常
    */
  @throws[Exception](classOf[Exception])
  override def postStop(): Unit = {
    log.debug("stopping all consumer")

    /**
      * 注销注册
      */
    consumer.unsubscribe()

    /**
      * 关闭
      */
    consumer.close()
    log.debug("stop all consumer")

  }


  /**
    * 消费方法
    * @param timeOut 时间范围
    */
  private def consume(timeOut: Long): Unit = {
    consumer.poll(timeOut).foreach { record ⇒
      log.info(s"${self.path.toString} receive ${record.key} value ${record.value} " +
        s"offset ${record.offset} partition ${record.partition} topic ${record.topic}")
    }
    consumer.commitAsync()

  }

  /**
    * actor的策略
    */
  override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() {
    case e: Exception ⇒
      //handle failing kafka
      log.error(s"Read failed $e")
      Escalate
  }

  /**
    * 初始化actor
    */
  private def initConsumer() = {
    log.debug(s"Config ${KafkaConsumerConfig()}")
    consumer = new KafkaConsumer[String, Long](KafkaConsumerConfig())
    consumer.subscribe(Vector(topic))
  }
}


object NumberConsumerByKafkaClient {
  /**
    * 生产actor
    * @return 参数包含actor
    */
  def props: Props = Props(new NumberConsumerByKafkaClient)
}

另一种用阻塞的kafkaStrema的:

package com.linewell.akkakafka.kafka.consume

import java.util.concurrent.Executors

import akka.actor.SupervisorStrategy.Escalate
import akka.actor._
import com.linewell.akkakafka.common.deserializer.{Decoder, LongDeserializer}
import com.linewell.akkakafka.common.util.KafkaConsumerConfig
import com.linewell.akkakafka.kafka.bean.Command._
import com.linewell.akkakafka.kafka.consume.NumberConsumerByKafkaStream.Consume
import com.typesafe.config.ConfigFactory
import kafka.consumer.{Consumer, ConsumerConfig, ConsumerConnector, KafkaStream}
import kafka.message.MessageAndMetadata
import org.apache.kafka.common.serialization.StringDeserializer
import scala.async.Async._
import scala.concurrent.{ExecutionContext, ExecutionContextExecutor}
import scala.util.{Failure, Success, Try}

/**
  * Created by ctao on 16-1-23.
  * kafkastream实现的消费,含有阻塞
  */
class NumberConsumerByKafkaStream extends Actor with ActorLogging {

  val conf = ConfigFactory.load()
  /**
    * 消费连接
    */
  private var consumer: Try[ConsumerConnector] = _
  /**
    * 线程池线程数,主要提供给kafka消费,将阻塞放在单独池中
    */
  val threadNum = conf.getInt("common.threadNum")
  private val executor = Executors.newFixedThreadPool(threadNum)
  val topic = conf.getString("consumer.topic")
  /**
    * kafkaStream,在关闭前clean
    */
  private var streams: Option[List[KafkaStream[String, Long]]] = None

  override def receive: Receive = {
    /**
      * 开始消费,则获取stream传递给自己
      */
    case StartConsume ⇒ consumer.foreach { (consumer: ConsumerConnector) ⇒
      val consumerStreams = consumer.createMessageStreams(Map(topic → 1)
        , Decoder(topic, new StringDeserializer),
        Decoder(topic, new LongDeserializer))

      streams = Option(consumerStreams(topic))
      if (streams.isDefined) {
        log.info(s"Got streams ${streams.get.length} $streams")
        streams.get.foreach { kafkaStream ⇒
          self ! Consume(kafkaStream)

        }
      }
    }

    /**
      * 如果是stream,则放在池中
      */
    case Consume(kafkaStream) ⇒
      log.info(s"Handling KafkaStream ${kafkaStream.clientId}")
      implicit val executionContextExecutor: ExecutionContextExecutor = ExecutionContext.fromExecutor(executor)
      async {
        kafkaStream.iterator().foreach {
          case msg: MessageAndMetadata[String, Long] ⇒
            log.info(s"${self.path.toString} : kafkaStream ${kafkaStream.clientId} " +
              s" received offset ${msg.offset}  partition ${msg.partition} value ${msg.message}")
        }
      }


  }


  override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() {
    case e: Exception ⇒
      //handle failing kafka
      log.error(s"Read failed $e")
      Escalate
  }

  /**
    * 开始前动作
    */
  override def preStart(): Unit = {
    super.preStart()
    consumer = Try(Consumer.create(consumerConfig))
    consumer match {
      case Success(c) ⇒ context.parent ! ReadInitialized(self)
        self ! StartConsume
      case Failure(e) ⇒
        log.error(e, "Could not create kafkaConsumer")
        context.parent ! Shutdown
    }


  }

  /**
    * 结束前动作
    * @throws java.lang.Exception 异常
    */
  @throws[Exception](classOf[Exception])
  override def postStop(): Unit = {
    if (streams.isDefined) {
      log.debug("cleaning streams")
      streams.get.foreach(_.clear())
      log.debug("cleaned streams")
    }
    log.debug("stopping all consumer")
    consumer.foreach(_.shutdown())
    log.debug("stop all consumer")
    log.debug("shutting down execution")
    executor.shutdown()
    log.debug("shutdown execution")
  }

  /**
    * 消费配置
    */
  private val consumerConfig: ConsumerConfig = new ConsumerConfig(KafkaConsumerConfig())

}


object NumberConsumerByKafkaStream {

  /**
    * 返回包含消费者的参数
    * @return
    */
  def props: Props = Props(new NumberConsumerByKafkaStream())

  /**
    * 包含kafkaStream的消费类
    * @param kafkaStream  流
    */
  private case class Consume(kafkaStream: KafkaStream[String, Long])

}

生产者

package com.linewell.akkakafka.kafka.produce

import akka.actor.SupervisorStrategy.Resume
import akka.actor._
import akka.event.LoggingReceive
import com.linewell.akkakafka.common.util.KafkaProducerConfig
import com.linewell.akkakafka.kafka.bean.Command.{StartProduce, WriteInitialized}
import com.typesafe.config.ConfigFactory
import org.apache.kafka.clients.producer._


/**
  * Created by ctao on 16-1-23.
  */
class NumberProducer extends Actor with ActorLogging {
  private val conf = ConfigFactory.load()
  /**
    * 产生数字数量
    */
  val numMessage = conf.getInt("common.numMessage")

  val topic = conf.getString("consumer.topic")


  override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() {
    case e: Exception ⇒
      //can handle failing here
      log.error(s"Write failed $e")
      Resume
  }

  private var producer: KafkaProducer[String, Long] = _

  /**
    * 启动前执行
    */
  override def preStart(): Unit = {
    producer = initProducer()
    context.parent ! WriteInitialized(self)
    self ! StartProduce
  }

  /**
    * 接收到开始生产后则调用生产函数
    */
  override def receive: Receive = LoggingReceive {
    case StartProduce ⇒ produce(producer, numMessage)
  }

  /**
    * 结束前调用
    * @throws java.lang.Exception 异常
    */
  @throws[Exception](classOf[Exception])
  override def postStop(): Unit = {
    log.debug("closing producer")
    producer.close()
    log.debug("closed producer")
  }

  /**
    * 生产函数
    * @param producer 生产者
    * @param numMessage 消息数量
    */
  private def produce(producer: KafkaProducer[String, Long], numMessage: Int): Unit = {
    (1 to numMessage).foreach { messageNum ⇒
      val message = new ProducerRecord[String, Long](topic, (messageNum + 1).toString, messageNum)
      producer.send(message, new Callback {
        override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = {
          val maybeMetadata = Option(metadata)
          val maybeException = Option(exception)
          if (maybeMetadata.isDefined) {
            log.info(s"actor ${self.path.toString}: $messageNum onCompletion offset ${metadata.offset},partition ${metadata.partition}")
          }
          if (maybeException.isDefined) {
            log.error(exception, s"$messageNum onCompletion received error")
          }
        }
      })


    }
  }

  /**
    * 初始化
    * @return 生产者
    */
  private def initProducer(): KafkaProducer[String, Long] = {
    log.debug(s"Config ${KafkaProducerConfig()}")
    new KafkaProducer[String, Long](KafkaProducerConfig())

  }
}


object NumberProducer {
  def props: Props = Props(new NumberProducer)
}

调度


package com.linewell.akkakafka.kafka.coordination

import akka.actor.{Actor, ActorLogging, ActorRef}
import akka.stream.ActorMaterializer
import com.linewell.akkakafka.kafka.bean.Command._
import com.linewell.akkakafka.kafka.bean.{Command, Mode}
import com.linewell.akkakafka.kafka.consume.NumberConsumerByKafkaStream
import com.linewell.akkakafka.kafka.produce.NumberProducer
import com.typesafe.config.ConfigFactory

import scala.collection.mutable.ArrayBuffer
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._

/**
  * Created by ctao on 16-1-23.
  * 调度类
  */
class Coordinator extends Actor with ActorLogging {

  import Coordinator._

  val conf = ConfigFactory.load()

  /**
    * 调度时间,类似定时器
    */
  val schedulerTime = conf.getInt("common.scheduler.time")

  /**
    * 写actor队列
    */
  val writerBuffer: ArrayBuffer[Option[ActorRef]] = new ArrayBuffer[Option[ActorRef]]()

  /**
    * 读actor队列
    */
  val readerBuffer: ArrayBuffer[Option[ActorRef]] = new ArrayBuffer[Option[ActorRef]]()

  /**
    * 物化
    */
  lazy val mat = ActorMaterializer()(context)

  override def receive: Receive = {
    /**
      * 如果是包含startnum和模式的消息,则调用构建方法
      */
    case msg@InitialMessage(StartNumber(num), mode) ⇒
      log.debug(s"Starting the numbers coordinator with $msg")
      buildWriteOrBuildRead(mode, num)

    /**
      * 无法识别的消息类型
      */
    case msg: InitialMessage ⇒
      log.error(s"Did not understand $msg")
      log.error("shutdown")
      context.system.shutdown()

    /**
      * actor在初始化结束后会向父节点发送消息,接收到读初始化结束后,启动一个调度时间的调度器,将stopactor
      * 的消息发给自己
      */
    case ReadInitialized(actorRef) ⇒
      log.debug(s"Reader initialized :${actorRef.path.toString}")
      context.system.scheduler.scheduleOnce(schedulerTime.seconds, self, Stop(actorRef))
      log.debug(s"end scheduler stop ${actorRef.path.toString}")
    /**
      * actor在初始化结束后会向父节点发送消息,接收到写初始化结束后,启动一个调度时间的调度器,将stopactor
      * 的消息发给自己
      */
    case WriteInitialized(actorRef) ⇒
      log.debug(s"Writer initialized:${actorRef.path.toString}")
      context.system.scheduler.scheduleOnce(schedulerTime.seconds, self, Stop(actorRef))
      log.debug(s"end scheduler stop ${actorRef.path.toString}")

    /**
      * 收到自己发送的stop消息后则对应队列移除actor
      */
    case Stop(actorRef) ⇒
      log.debug("Stopping the coordinator")

      writerBuffer -= Some(actorRef)
      readerBuffer -= Some(actorRef)

      log.debug(s"writeBuffer.length ${writerBuffer.length} and readerBuffer.length ${readerBuffer.length}")

      /**
        * 如果读写队列都为空,则给自己发送system的停止消息
        */
      if (writerBuffer.isEmpty && readerBuffer.isEmpty) {
        context.system.scheduler.scheduleOnce(1.seconds, self, Shutdown)
      }

    /**
      * 停止system
      */
    case Shutdown ⇒
      log.debug("Shutting down the app")
      context.system.shutdown()
      log.info("shutdown the app")
  }


  /**
    * 构建读写模式方法
    * @param mode 模式
    * @param numActor actor数量
    */
  def buildWriteOrBuildRead(mode: Mode, numActor: Int): Unit = mode match {
    /**
      * 写模式,则写队列增减对应数量actor
      */
    case Mode.Write ⇒
      log.debug("write mode")
      (1 to numActor).foreach { x ⇒
        val writer = Some(context.actorOf(NumberProducer.props, name = s"writerActor-$x"))
        writerBuffer += writer
      }

    /**
      * 读模式,则读队列增减对应数量actor
      */
    case Mode.Read ⇒
      log.debug("read mode")
      (1 to numActor).foreach { x ⇒
        val reader = Some(context.actorOf(NumberConsumerByKafkaStream.props, name = s"readerActor-$x"))
        readerBuffer += reader
      }

    /**
      * 读写模式,则读写队列各增减对应数量actor
      */
    case Mode.Readwrite ⇒
      log.debug("readwrite mode")
      (1 to numActor).foreach { x ⇒
        val writer = Some(context.actorOf(NumberProducer.props, name = s"writerActor-$x"))
        val reader = Some(context.actorOf(NumberConsumerByKafkaStream.props, name = s"readerActor-$x"))
        writerBuffer += writer
        readerBuffer += reader
      }


  }
}


case object Coordinator {

  /**
    * 初始化消息
    * @param name 消息类型
    * @param mode 模式
    */
  case class InitialMessage(name: Command, mode: Mode)

}

应用


package com.linewell.akkakafka.kafka.application

import akka.actor.{ActorSystem, Props}
import com.linewell.akkakafka.kafka.bean.Command.StartNumber
import com.linewell.akkakafka.kafka.bean.Mode.{Readwrite, Write, Read}
import com.linewell.akkakafka.kafka.coordination.Coordinator
import com.linewell.akkakafka.kafka.coordination.Coordinator.InitialMessage
import com.typesafe.config.ConfigFactory
import org.slf4j.LoggerFactory

/**
  * Created by ctao on 16-1-23.
  */
object Application extends App {

  val log = LoggerFactory.getLogger(this.getClass)

  val system = ActorSystem("Kafka")

  /**
    * 调度actor
    */
  val coordinator = system.actorOf(Props(new Coordinator), name = "coordinator")

  val conf = ConfigFactory.load()
  private val numActor = conf.getInt("common.actor")
  log.info(s"start app")

  /**
    * app的模式
    */
  val appMode = conf.getString("common.mode") match {
    case s:String  ⇒ s.toUpperCase match {
      case "READ" ⇒ Read
      case "WRITE" ⇒ Write
      case "READWRITE" ⇒ Readwrite
    }
    case _ ⇒ throw  new IllegalArgumentException("can't load mode")
  }

  /**
    * 启动调度
    */

  coordinator ! InitialMessage(StartNumber(numActor),appMode)

}

kafka配置类


package com.linewell.akkakafka.common.util

import java.util.Properties

import com.typesafe.config.ConfigFactory

/**
  * Created by ctao on 16-1-27.
  * kafka消费者配置消息
  */
trait KafkaConsumerConfig extends Properties {

  import KafkaConsumerConfig._

  private val consumerPrefixWithDot = consumerPrefix + "."

  val allKeys = Seq(groupId,
    zookeeperConnect,
    zookeeperConnectionTimeOut,
    zookeeperSessionTimeOut,
    reBalanceBackOff,
    reBalanceMaxRetries,
    keyDeserializer,
    valueDeserializer,
    servers
  )

  lazy val conf = ConfigFactory.load()

  allKeys.map { key ⇒
    if (conf.hasPath(key)) {
      put(key.replace(consumerPrefixWithDot, ""), conf.getString(key))
    }
  }

}


object KafkaConsumerConfig {

  val consumerPrefix = "consumer"
  //Consumer Keys
  val groupId = s"$consumerPrefix.group.id"
  val zookeeperConnect = s"$consumerPrefix.zookeeper.connect"
  val topic = s"$consumerPrefix.topic"
  val zookeeperSessionTimeOut = s"$consumerPrefix.zookeeper.session.timeout.ms"
  val zookeeperConnectionTimeOut = s"$consumerPrefix.zookeeper.connection.timeout.ms"
  val reBalanceBackOff = s"$consumerPrefix.rebalance.backoff.ms"
  val reBalanceMaxRetries = s"$consumerPrefix.rebalance.max.retries"
  val keyDeserializer = s"$consumerPrefix.key.com.linewell.akkakafka.common.deserializer"
  val valueDeserializer = s"$consumerPrefix.value.com.linewell.akkakafka.common.deserializer"
  val servers = s"$consumerPrefix.bootstrap.servers"
  def apply(): KafkaConsumerConfig = new KafkaConsumerConfig {}
}


package com.linewell.akkakafka.common.util

import java.util.Properties

import com.typesafe.config.ConfigFactory
import org.apache.kafka.clients.producer.ProducerConfig

/**
  * Created by ctao on 16-1-25.
  * kafka生产者配置信息
  */
trait KafkaProducerConfig extends Properties {

  import KafkaProducerConfig._

  private val producerPrefixWithDot = producerPrefix + "."

  private val allKeys = Seq(
    brokers,
    brokers,
    keySerializer,
    valueSerializer,
    partitioner,
    requiredAcks,
    servers
  )

  lazy val conf = ConfigFactory.load()
  allKeys.map { key ⇒
    if (conf.hasPath(key)) {
      put(key.replace(producerPrefixWithDot, ""), conf.getString(key))
    }
  }




}

object KafkaProducerConfig {

  val producerPrefix = "producer"


  //Producer Keys
  val brokers = s"$producerPrefix.metadata.broker.list"
  val keySerializer = s"$producerPrefix.key.com.linewell.akkakafka.common.serializer"
  val valueSerializer = s"$producerPrefix.value.com.linewell.akkakafka.common.serializer"
  val servers = s"$producerPrefix.bootstrap.servers"
  val partitioner = s"$producerPrefix.partitioner.class"
  val requiredAcks = s"$producerPrefix.request.required.acks"


  def apply(): KafkaProducerConfig = new KafkaProducerConfig {}

}

序列化和解析

package com.linewell.akkakafka.common.serializer

import java.util

import org.apache.kafka.common.serialization.Serializer

/**
  * Created by ctao on 16-1-26.
  * Long型的序列化,是kafka序列化的子类
  */
class LongSerializer extends Serializer[Long] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()

  override def serialize(topic: String, data: Long): Array[Byte] = BigInt(data).toByteArray

  override def close(): Unit = ()
}



package com.linewell.akkakafka.common.deserializer

import java.util

import org.apache.kafka.common.serialization.Deserializer

/**
  * Created by ctao on 16-1-26.
  * Long解析类
  */
class LongDeserializer extends Deserializer[Long]{
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()

  override def close(): Unit = ()

  override def deserialize(topic: String, data: Array[Byte]): Long = BigInt(data).toLong
}

my github

你可能感兴趣的:(scala,akka,kafka)