Docker Swarm上基于Consul实现高可用RabbitMQ集群。RabbitMQ是基于高级消息队列协议(AMQP)实现的开源消息代理软件,主要提供消息队列服务。这里介绍用Docker Compose搭建RabbitMQ高可用集群的过程。
Swarm是Docker官方提供的一款集群管理工具,其主要作用是把若干台Docker主机抽象为一个整体,并且通过一个入口统一管理这些Docker主机上的各种Docker资源。
Swarm和Kubernetes比较类似,但是更加轻,具有的功能也较kubernetes更少一些
实验通过Docker swarm建立基于Consul自动发现的高可用性的RabbitMQ集群。通过HAProxy服务器以负载均衡AMQP请求,并提高群集的整体可用性。
主机名 | IP |
---|---|
D-master | 192.168.32.11 |
D-node-1 | 192.168.32.12 |
D-node-2 | 192.168.32.13 |
三台主机节点都是一样的docker版本以及操作系统
# docker --version
Docker version 18.09.9, build 039a7df9ba
# cat /etc/issue
Ubuntu 18.04.1 LTS \n \l
在master上初始化集群
root@D-master:~# docker swarm init --advertise-addr 192.168.32.11
Swarm initialized: current node (nt58bpwl6t31cuncok453d8bv) is now a manager.
To add a worker to this swarm, run the following command:
docker swarm join --token SWMTKN-1-2qdamb060dkj7y7fr080ywgfakxuuyncwjq19nsk14tbacu02l-2ebxvqnsvql3lu3xpybkzcpqu 192.168.32.11:2377
To add a manager to this swarm, run 'docker swarm join-token manager' and follow the instructions.
把node的两台机器加入到master集群
#node1:
root@D-node-1:~# docker swarm join --token SWMTKN-1-2qdamb060dkj7y7fr080ywgfakxuuyncwjq19nsk14tbacu02l-2ebxvqnsvql3lu3xpybkzcpqu 192.168.32.11:2377
This node joined a swarm as a worker.
#node2:
root@D-node-2:~# docker swarm join --token SWMTKN-1-2qdamb060dkj7y7fr080ywgfakxuuyncwjq19nsk14tbacu02l-2ebxvqnsvql3lu3xpybkzcpqu 192.168.32.11:2377
This node joined a swarm as a worker.
在master主机上查看集群状态
root@D-master:~# docker node ls
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
nt58bpwl6t31cuncok453d8bv * D-master Ready Active Leader 18.09.9
6e5sj1f8wzi6notn96hz9w3oe D-node-1 Ready Active 18.09.9
wfnlemwns5y8fv0i67jlx7atl D-node-2 Ready Active 18.09.9
root@D-master:~# docker node promote D-node-1 #提升node1
Node D-node-1 promoted to a manager in the swarm.
root@D-master:~# docker node promote D-node-2 #提升node2
Node D-node-2 promoted to a manager in the swarm.
#查看各个节点的node角色
root@D-master:~# docker node inspect D-master | less | grep -i role
"Role": "manager",
root@D-master:~# docker node inspect D-node-1 | less | grep -i role
"Role": "manager",
root@D-master:~# docker node inspect D-node-2 | less | grep -i role
"Role": "manager",
root@D-master:~# for i in `docker node ls -q`;do docker node inspect $i | grep -i role ;done
"Role": "manager",
"Role": "manager",
"Role": "manager",
root@D-master:~# docker network create --driver=overlay --attachable prod #创建网络
g7vpwhorx9twyoxsyl1k994vj
root@D-master:~# docker network ls #查看网络列表
NETWORK ID NAME DRIVER SCOPE
7e60ca1d698e bridge bridge local
dxlvucnscnc7 consul_consul overlay swarm
5953313f3a26 docker_gwbridge bridge local
1ac0d262e7e3 host host local
fy418rdt19f1 ingress overlay swarm
2f3907cd86b4 none null local
g7vpwhorx9tw prod overlay swarm
「注意点」
持久化consul的数据,避免数据丢失
在global模式下部署Consul服务,并通过节点标签管理服务调度
使用两个单独的网络,一个用于内部consul的通信,另一个用于RabbitMQ与Consul服务之间的通信
root@D-master:~# mkdir consul_rabbitmq_docker
root@D-master:~# cd consul_rabbitmq_docker
root@D-master:~/consul_rabbitmq_docker# docker node update --label-add consul=true D-master
D-master
root@D-master:~# docker node update --label-add consul=true D-node-1
D-node-1
root@D-master:~/consul_rabbitmq_docker# docker node update --label-add consul=true D-node-2
D-node-2
# for i in `docker node ls -q`;do docker node inspect $i | grep -iA 5 label ;done #查拉姆节点标签
如果节点标签定义错误,可以先对节点标签删除操作,然后重新创建
#删除节点操作
# docker node update --label-rm 标签名 主机名
这个部署清单用于通过swarm来编排docker的pod,启动操作:docker stack deploy -c + .yml文件
root@D-master:~/consul_rabbitmq_docker# vim docker-compose_consul.yaml
version: '3.6'
services:
consul:
image: consul:1.4.0
hostname: "{{.Node.Hostname}}"
networks:
- consul
- prod
ports:
- 8400:8400
- 8500:8500
- 8600:53
volumes:
- consul-data:/consul/data
deploy:
mode: global
placement:
constraints: [node.labels.consul == true]
command: [ "agent", "-server", "-bootstrap-expect=3", "-retry-max=3", "-retry-interval=10s", "-datacenter=prod", "-join=consul", "-retry-join=consul", "-bind={{ GetInterfaceIP \"eth0\" }}", "-client=0.0.0.0", "-ui"]
networks:
consul:
prod:
external: true
volumes:
consul-data:
root@D-master:~/consul_rabbitmq_docker# docker stack deploy -c docker-compose_consul.yaml consul
Creating network consul_consul
Creating service consul_consul
#查看节点标签
root@D-master:~/consul_rabbitmq_docker# for i in `docker node ls -q`;do docker node inspect $i | grep -iC 5 label ;done
root@D-master:~# curl 192.168.32.11:8500/v1/status/leader
"10.0.2.47:8300"
root@D-master:~# curl 192.168.32.12:8500/v1/status/leader
"10.0.2.47:8300"
root@D-master:~# curl 192.168.32.13:8500/v1/status/leader
"10.0.2.47:8300"
root@D-master:~# curl 192.168.32.11:8500/v1/status/peers #集群
["10.0.2.47:8300","10.0.2.35:8300","10.0.2.49:8300"]
root@D-master:~# docker ps -a |grep consul
访问网页的consul的dashboard来验证安装是否成功。
http://192.168.32.11:8500/
「注意点」
持久化数据防止数据丢失
在global模式下部署RabbitMQ服务,并通过节点标签管理服务调度
使用Prod网络进行内部/外部RabbitMQ通信
不要暴露 RABBITMQ_ERLANG_COOKIE and RABBITMQ_DEFAULT_PASS
主机名很重要,因为RabbitMQ使用主机名作为数据目录
root@D-master:~/consul_rabbitmq_docker# vim docker-compose_rabbitmq.yml
root@D-master:~/consul_rabbitmq_docker# cat docker-compose_rabbitmq.yml
version: "3.6"
services:
rabbitmq-01:
image: olgac/rabbitmq:3.7.8-management
hostname: rabbitmq-01
environment:
- RABBITMQ_DEFAULT_USER=admin
- RABBITMQ_DEFAULT_PASS=Passw0rd
- RABBITMQ_ERLANG_COOKIE="MY-SECRET-KEY-123"
networks:
- prod
volumes:
- rabbitmq-01-data:/var/lib/rabbitmq
deploy:
mode: global
placement:
constraints: [node.labels.rabbitmq1 == true]
rabbitmq-02:
image: olgac/rabbitmq:3.7.8-management
hostname: rabbitmq-02
environment:
- RABBITMQ_DEFAULT_USER=admin
- RABBITMQ_DEFAULT_PASS=Passw0rd
- RABBITMQ_ERLANG_COOKIE="MY-SECRET-KEY-123"
networks:
- prod
volumes:
- rabbitmq-02-data:/var/lib/rabbitmq
deploy:
mode: global
placement:
constraints: [node.labels.rabbitmq2 == true]
rabbitmq-03:
image: olgac/rabbitmq:3.7.8-management
hostname: rabbitmq-03
environment:
- RABBITMQ_DEFAULT_USER=admin
- RABBITMQ_DEFAULT_PASS=Passw0rd
- RABBITMQ_ERLANG_COOKIE="MY-SECRET-KEY-123"
networks:
- prod
volumes:
- rabbitmq-03-data:/var/lib/rabbitmq
deploy:
mode: global
placement:
constraints: [node.labels.rabbitmq3 == true]
networks:
prod:
external: true
volumes:
rabbitmq-01-data:
rabbitmq-02-data:
rabbitmq-03-data:
# docker-compose config 用于部署清单.yml文件的语法检测
在定义标签错误的情况下,可以先删除节点标签,然后再创建新的节点标签
#删除节点标签操作:
root@D-master:~/consul_rabbitmq_docker# docker node update --label-rm rabbitmq1 D-node-1
D-node-1
###自动逸rabbitmq标签
root@D-master:~/consul_rabbitmq_docker# docker node update --label-add rabbitmq1=true D-master
D-master
root@D-master:~/consul_rabbitmq_docker# docker node update --label-add rabbitmq2=true D-node-1
D-node-1
root@D-master:~/consul_rabbitmq_docker# docker node update --label-add rabbitmq3=true D-node-2
D-node-2
###查看node标签:
root@D-master:~/consul_rabbitmq_docker# for i in `docker node ls -q`;do docker node inspect $i | grep -iA 3 label ;done
root@D-master:~/consul_rabbitmq_docker# docker stack deploy -c docker-compose_rabbitmq.yml rabbitmq
Creating service rabbitmq_rabbitmq-01
Creating service rabbitmq_rabbitmq-02
Creating service rabbitmq_rabbitmq-03
root@D-master:~# docker ps -a |grep rabbitmq
这里会看到docker中依旧把rabbitmq集群已经起来了,但是在配置文件中没有把rabbitmq的端口暴露出来,所以暂时在web端访问不了。为了安全考虑,我们通过和rabbitmq在同一个docker service网段部署一个Haproxy,让Haproxy不仅仅负责访问调度机制,同时service访问的方式暴露rabbitmq的端口,从而进行访问MQ集群。
root@D-master:~# docker ps #找到对应容器的ID
root@D-master:~# docker exec -it 5f735718d62f /bin/bash #进到对应容器中交互操作
root@rabbitmq-01:/# find / -name "*plugins*"
root@rabbitmq-01:/# cat /etc/rabbitmq/enabled_plugins
[rabbitmq_management,
rabbitmq_peer_discovery_consul,
rabbitmq_federation,
rabbitmq_federation_management,
rabbitmq_shovel,
rabbitmq_shovel_management].
root@rabbitmq-01:/# find / -name "*rabbitmq.conf*"
root@rabbitmq-01:/# cat /etc/rabbitmq/rabbitmq.conf
loopback_users.sa = false
cluster_formation.peer_discovery_backend = rabbit_peer_discovery_consul
cluster_formation.consul.host = consul
cluster_formation.node_cleanup.only_log_warning = true
cluster_formation.consul.svc_addr_auto = true
cluster_partition_handling = autoheal
vm_memory_high_watermark.relative = 0.8
disk_free_limit.absolute = 5GB
loopback_users.guest = false
listeners.tcp.default = 5672
default_pass = Passw0rd
default_user = admin
hipe_compile = false
management.listener.port = 15672
management.listener.ssl = false
root@D-master:~/consul_rabbitmq_docker# vim docker-compose_haproxy.yml
version: "3.6"
services:
haproxy:
image: olgac/haproxy-for-rabbitmq:1.8.14-alpine
ports:
- 15672:15672
- 5672:5672
- 1936:1936
networks:
- prod
deploy:
mode: global
networks:
prod:
external: true
root@D-master:~/consul_rabbitmq_docker# docker stack deploy -c docker-compose_haproxy.yml haproxy
Creating service haproxy_haproxy
root@D-master:~/consul_rabbitmq_docker# ss -ntlp |grep 1936
root@D-master:~/consul_rabbitmq_docker# ps -ef |grep haproxy
root 13380 13348 0 14:01 ? 00:00:00 haproxy -W -db -f /usr/local/etc/haproxy/haproxy.cfg
root 13477 13380 0 14:01 ? 00:00:00 haproxy -W -db -f /usr/local/etc/haproxy/haproxy.cfg
root 13574 9914 0 14:01 pts/0 00:00:00 grep --color=auto haproxy
root@D-master:~# docker ps
root@D-master:~# docker exec -it 23b0dbed70ae /bin/sh
/ #
/ # find / -name "*haproxy.cfg*"
/usr/local/etc/haproxy/haproxy.cfg
/ #
/ # cat /usr/local/etc/haproxy/haproxy.cfg
global
log 127.0.0.1 local0
log 127.0.0.1 local1 notice
maxconn 4096
defaults
log global
option tcplog
option dontlognull
timeout connect 6s
timeout client 60s
timeout server 60s
listen stats
bind *:1936
mode http
stats enable
stats hide-version
stats realm Haproxy\ Statistics
stats uri /
listen rabbitmq
bind *:5672
mode tcp
server rabbitmq-01 rabbitmq-01:5672 check
server rabbitmq-02 rabbitmq-02:5672 check
server rabbitmq-03 rabbitmq-03:5672 check
listen rabbitmq-ui
bind *:15672
mode http
server rabbitmq-01 rabbitmq-01:15672 check
server rabbitmq-02 rabbitmq-02:15672 check
server rabbitmq-03 rabbitmq-03:15672 check
/ #
访问 http://IP:15672 RabbitMQ的控制后台,密码为:(admin/Passw0rd),密码在前面.yml文件中定义的。