基于三台物理机Docker搭建kafka+ELK

Docker搭建kafka+ELK

  • 搭建zookeeper+kafaka
    • 搭建zookeeper
      • 下载zk+kafka镜像
      • 修改zoo.cfg配置文件
      • 创建数据挂载目录及myid文件
      • 修改zookeeper运行日志位置
      • 启动zookeeper:其余两台操作一样
      • 验证zk
    • 启动kafka
  • 搭建ES
  • 搭建logstash
    • 192.168.9.203、192.168.9.204主机
  • kafka+logstash高可用负载均衡原理

搭建zookeeper+kafaka

搭建zookeeper

下载zk+kafka镜像

[root@elk ~]# docker pull wurstmeister/zookeeper:latest
[root@elk ~]# docker pull wurstmeister/zookeeper:latest

修改zoo.cfg配置文件

# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial 
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between 
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just 
# example sakes.
dataDir=/opt/zookeeper-3.4.13/data
dataLogDir=/opt/zookeeper-3.4.13/datalog
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the 
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1server.27=192.168.0.27:2888:3888
server.3=192.168.9.203:2888:3888
server.4=192.168.9.204:2888:3888
server.5=192.168.9.205:2888:3888
myid=4

创建数据挂载目录及myid文件

mkdir -p zookeeper/{conf,data,log}
echo 4 > ./data/myid

修改zookeeper运行日志位置

zookeeper.out文件属于运行时的日志文件,通过conf/log4j.properties文件配置。

# 以下是原配置
zookeeper.root.logger=INFO, CONSOLE
log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender

# 以下是修改后配置
zookeeper.root.logger=INFO, ROLLINGFILE
log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender

修改bin/zkEnv.sh

# 以下是原配置
if [ "x${ZOO_LOG_DIR}" = "x" ]
then
    ZOO_LOG_DIR="."
fi

if [ "x${ZOO_LOG4J_PROP}" = "x" ]
then
    ZOO_LOG4J_PROP="INFO,CONSOLE"
fi


# 以下是修改后配置
if [ "x${ZOO_LOG_DIR}" = "x" ]
then
    ZOO_LOG_DIR="/opt/zookeeper-3.4.13/datalog/"
fi

if [ "x${ZOO_LOG4J_PROP}" = "x" ]
then
    ZOO_LOG4J_PROP="INFO,ROLLINGFILE"
fi

启动zookeeper:其余两台操作一样

docker run -d -p 2181:2181 -p 2888:2888 -p 3888:3888 --name zookeeper204 --network host -v /opt/zookeeper/log/:/opt/zookeeper-3.4.13/datalog/ -v /opt/zookeeper/conf/zoo.cfg:/opt/zookeeper-3.4.13/conf/zoo.cfg -v /opt/zookeeper/data/myid:/opt/zookeeper-3.4.13/data/myid -it 192.168.9.203:5000/zookeeper:3.4.13

验证zk

进入zk容器

root@localhost:/opt/zookeeper-3.4.13# echo stat|nc 192.168.9.203 2181
Zookeeper version: 3.4.13-2d71af4dbe22557fda74f9a9b4309b15a7487f03, built on 06/29/2018 04:05 GMT
Clients:
 /192.168.9.204:50736[0](queued=0,recved=1,sent=0)
 /192.168.9.203:38918[1](queued=0,recved=10462,sent=10466)

Latency min/avg/max: 0/0/13
Received: 10488
Sent: 10491
Connections: 2
Outstanding: 0
Zxid: 0x200000078
Mode: leader
Node count: 36
Proposal sizes last/min/max: 32/32/332

启动kafka

zks="192.168.9.203:2181,192.168.9.204:2181,192.168.9.205:2181"; docker run -p 9092:9092 --name kafka204 --network host -d -e KAFKA_BROKER_ID=204 -e KAFKA_ZOOKEEPER_CONNECT=${zks} -e KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://192.168.9.204:9092 -e KAFKA_LISTENERS=PLAINTEXT://0.0.0.0:9092 -v /opt/kafka/log/:/opt/kafka/logs/ -v /opt/kafka/data/:/kafka/kafka-logs-elk/ 192.168.9.203:5000/kafka:2.12

创建topic消费验证即可

搭建ES

下载镜像

docker pull elasticsearch:6.8.6

创建挂载目录

mkdir -p elasticsearch/{data,logs,config}

调整JVM大小

vim /etc/sysctl.conf
##末尾添加vm.max_map_count=262144
# sysctl settings are defined through files in
# /usr/lib/sysctl.d/, /run/sysctl.d/, and /etc/sysctl.d/.
#
# Vendors settings live in /usr/lib/sysctl.d/.
# To override a whole file, create a new file with the same in
# /etc/sysctl.d/ and put new settings there. To override
# only specific settings, add a file with a lexically later
# name in /etc/sysctl.d/ and put new settings there.
#
# For more information, see sysctl.conf(5) and sysctl.d(5).
vm.max_map_count=262144

sysctl -p

修改elasticsearch.yml jvm.options

cluster.name: es-test
node.name: node-204
node.master: true
node.data: true
path.data: /usr/share/elasticsearch/data
path.logs: /usr/share/elasticsearch/logs
#path.plugins: /usr/share/elasticsearch/plugins
http.port: 9200
network.host: 192.168.9.204
discovery.zen.ping.unicast.hosts: ["192.168.9.203:9300","192.168.9.204:9300","192.168.9.205:9300"]

-Xms2g
-Xmx2g

启动容器

docker run -d -p 9200:9200 -p 9300:9300 --name es204 --network host  -v /opt/elasticsearch/config/jvm.options:/usr/share/elasticsearch/config/jvm.options  -v /opt/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml -v /opt/elasticsearch/data/:/usr/share/elasticsearch/data/ -v /opt/elasticsearch/logs/:/usr/share/elasticsearch/logs/ 192.168.9.203:5000/es:6.8.6

其他三台同样操作

搭建logstash

创建挂载目录

192.168.9.203、192.168.9.204主机

203、204在conf.d/logstash-test.conf配置文件上需要保持kafka消费者组号一直(group_id => “wwh01”),输出到es的索引不能保持一致(具体原因位置)

mkdir -p logstash/{config/conf.d,logs,pipeline}

修改配置文件logstash.yml conf.d/logstash-test.conf

config:
  reload:
    automatic: true
    interval: 3s
xpack:
  management.enabled: false
  monitoring.enabled: false
path.config: /usr/share/logstash/config/conf.d/*.conf
path.logs: /usr/share/logstash/logs

input{
     kafka {
        topics => "test203" 
	group_id => "wwh01"
        type => "kafka204"
        bootstrap_servers => "192.168.9.203:9092,192.168.9.204:9092,192.168.9.205:9092"
        codec => "json" 
     }
}

output {
    elasticsearch { 
      hosts => ["192.168.9.203:9200"] 
      index => "kafka-204-%{+YYYY.MM.dd}"
    }
    stdout { codec => rubydebug }
} 

启动容器

docker run -d --restart=always --privileged=true --name logstash203 -p 5044:5044 -p 5047:5047 -p 9600:9600 --network host -v /usr/local/logstash/config/:/usr/share/logstash/config  192.168.9.203:5000/logstash:6.8.6

kafka+logstash高可用负载均衡原理

在kafka中有消费者组的概念,针对一个topic的分片,每个分片只能由同一个group中的一个consumer去消费,例如一个topic有三个partition,消费者组中有两个consumer,那么这两个consumer只能一个消费两个partition,另一个消费剩下的partition,不能出现两个consumer同时消费同一个partition。这就实现负载均衡,消费者消费分片算法如下:

  1. A=(partition数量/同组内消费者总个数)
  2. M=对上面所得到的A值小数点第一位向上取整
  3. 计算出该消费者拉取数据的patition合集:Ci = [P(M*i ) ~P((i + 1) * M -1)]

以上述为例两个consumer三个partition
A=(3/2)=1.5

M=2
第一个消费者C0消费的partition
C0=[P(2*0)~P((0+1)*2-1)]
=[P(0)~P(1)]
即第一个消费者消费partition0和partition1
同样算法C1消费[P(2)~P(3)],因为只有三个分片,所以C1只消费partition2

你可能感兴趣的:(基于三台物理机Docker搭建kafka+ELK)