参考:https://docs.docker.com/config/daemon/prometheus/
https://github.com/cyancow/swarmprom
按照github开源项目来进行配置:https://github.com/cyancow/swarmprom
$ git clone https://github.com/stefanprodan/swarmprom.git
$ cd swarmprom
修改docker默认配置文件增加以下两项内容,以便于promethus共享守护进程的数据
Linux修改: /etc/docker/daemon.json
Windows Server修改: C:\ProgramData\docker\config\daemon.json
增加以下两行内容,如果已经有其他配置则往原有大括号内追加以下两项内容,如果文件不存在则创建文件,内容如下:
{
"metrics-addr" : "127.0.0.1:9323",
"experimental" : true
}
修改网络及相关端口映射,将原配置文件中的网络net修改为,自有集群中的网络,这样现有集群中在当前网络中的服务即可以本检测到;
version: "3.3"
#原网络配置
#networks:
# net:
# driver: overlay
# attachable: true
#切换现有集群网络
networks:
mon_net:
driver: overlay
attachable: true
volumes:
prometheus: {}
grafana: {}
alertmanager: {}
configs:
caddy_config:
file: ./caddy/Caddyfile
dockerd_config:
file: ./dockerd-exporter/Caddyfile
node_rules:
file: ./prometheus/rules/swarm_node.rules.yml
task_rules:
file: ./prometheus/rules/swarm_task.rules.yml
services:
dockerd-exporter:
image: stefanprodan/caddy
networks:
- mon_net
environment:
- DOCKER_GWBRIDGE_IP=172.18.0.1
configs:
- source: dockerd_config
target: /etc/caddy/Caddyfile
deploy:
mode: global
resources:
limits:
memory: 128M
reservations:
memory: 64M
cadvisor:
image: google/cadvisor
networks:
- mon_net
command: -logtostderr -docker_only
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- /:/rootfs:ro
- /var/run:/var/run
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
deploy:
mode: global
resources:
limits:
memory: 128M
reservations:
memory: 64M
grafana:
image: stefanprodan/swarmprom-grafana:5.3.4
networks:
- mon_net
environment:
- GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin}
- GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
- GF_USERS_ALLOW_SIGN_UP=false
#- GF_SERVER_ROOT_URL=${GF_SERVER_ROOT_URL:-localhost}
#- GF_SMTP_ENABLED=${GF_SMTP_ENABLED:-false}
#- GF_SMTP_FROM_ADDRESS=${GF_SMTP_FROM_ADDRESS:[email protected]}
#- GF_SMTP_FROM_NAME=${GF_SMTP_FROM_NAME:-Grafana}
#- GF_SMTP_HOST=${GF_SMTP_HOST:-smtp:25}
#- GF_SMTP_USER=${GF_SMTP_USER}
#- GF_SMTP_PASSWORD=${GF_SMTP_PASSWORD}
volumes:
- grafana:/var/lib/grafana
deploy:
mode: replicated
replicas: 1
placement:
constraints:
- node.role == manager
resources:
limits:
memory: 128M
reservations:
memory: 64M
alertmanager:
image: stefanprodan/swarmprom-alertmanager:v0.14.0
networks:
- mon_net
environment:
- SLACK_URL=${SLACK_URL:-https://hooks.slack.com/services/TOKEN}
- SLACK_CHANNEL=${SLACK_CHANNEL:-general}
- SLACK_USER=${SLACK_USER:-alertmanager}
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager'
volumes:
- alertmanager:/alertmanager
deploy:
mode: replicated
replicas: 1
placement:
constraints:
- node.role == manager
resources:
limits:
memory: 128M
reservations:
memory: 64M
unsee:
image: cloudflare/unsee:v0.8.0
networks:
- mon_net
environment:
- "ALERTMANAGER_URIS=default:http://alertmanager:9093"
deploy:
mode: replicated
replicas: 1
node-exporter:
image: stefanprodan/swarmprom-node-exporter:v0.16.0
networks:
- mon_net
environment:
- NODE_ID={{.Node.ID}}
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
- /etc/hostname:/etc/nodename
command:
- '--path.sysfs=/host/sys'
- '--path.procfs=/host/proc'
- '--collector.textfile.directory=/etc/node-exporter/'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
- '--no-collector.ipvs'
deploy:
mode: global
resources:
limits:
memory: 128M
reservations:
memory: 64M
prometheus:
image: stefanprodan/swarmprom-prometheus:v2.5.0
networks:
- mon_net
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention=${PROMETHEUS_RETENTION:-24h}'
volumes:
- prometheus:/prometheus
configs:
- source: node_rules
target: /etc/prometheus/swarm_node.rules.yml
- source: task_rules
target: /etc/prometheus/swarm_task.rules.yml
deploy:
mode: replicated
replicas: 1
placement:
constraints:
- node.role == manager
resources:
limits:
memory: 2048M
reservations:
memory: 128M
caddy:
image: stefanprodan/caddy
ports:
- "3000:3000"
- "9090:9090"
- "9093:9093"
- "9094:9094"
networks:
- mon_net
environment:
- ADMIN_USER=${ADMIN_USER:-admin}
- ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
configs:
- source: caddy_config
target: /etc/caddy/Caddyfile
deploy:
mode: replicated
replicas: 1
placement:
constraints:
- node.role == manager
resources:
limits:
memory: 128M
reservations:
memory: 64M
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000"]
interval: 5s
timeout: 1s
retries: 5
启动命令这里比github改了一下,暂时不需要添加slack报警通知。可以后续再加。且报警方式有很多种,不一定选择slack;这里启动命令修改为:
ADMIN_USER=admin ADMIN_PASSWORD=admin docker stack deploy -c docker-compose.yml mon
查看服务
[root@elcndc2zhda01 swarmprom]# docker service ls | grep mon
cympto586gtr mon_alertmanager replicated 1/1 stefanprodan/swarmprom-alertmanager:v0.14.0
ksg0ysst4zm6 mon_caddy replicated 1/1 stefanprodan/caddy:latest *:9090->9090/tcp, *:9093-9094->9093-9094/tcp, *:10009->3000/tcp
ym4kz7ske9fy mon_cadvisor global 2/3 google/cadvisor:latest
ylfwmwttjek7 mon_dockerd-exporter global 3/3 stefanprodan/caddy:latest
mqpoup34kjn9 mon_grafana replicated 1/1 stefanprodan/swarmprom-grafana:5.3.4
inu71tv3lp0o mon_node-exporter global 3/3 stefanprodan/swarmprom-node-exporter:v0.16.0
sisf76m0vnr7 mon_prometheus replicated 1/1 stefanprodan/swarmprom-prometheus:v2.5.0
pkww94r8rtm1 mon_unsee replicated 1/1 cloudflare/unsee:v0.8.0
[root@elcndc2zhda01 swarmprom]#
检查一下,promethus、caddy、grafana的日志保证都没有报错,则启动成功了;
服务启动正常后通过caddy服务的容器所在node节点的ip加端口号来服务;(这里没有专门配置域名)
相关服务对应访问端口对应关系:
Services:
prometheus (metrics database) http://:9090
grafana (visualize metrics) http://:3000
node-exporter (host metrics collector)
cadvisor (containers metrics collector)
dockerd-exporter (Docker daemon metrics collector, requires Docker experimental metrics-addr to be enabled)
alertmanager (alerts dispatcher) http://:9093
unsee (alert manager dashboard) http://:9094
caddy (reverse proxy and basic auth provider for prometheus, alertmanager and unsee)
主要用到的是grafana:用来查看图标
http://:3000
promethus的服务不常用,如果熟悉使用也可以登录:
http://:9090
上述密码是在启动集群是命中带的admin:admin
以上就已经完成了,promethus监控组合的基础搭建;
打开设置->用户->邀请用户
填写用户信息,点invite邀请
查看邀请用户,并复制邀请链接
邀请链接会复制到剪切板,直接黏贴出来如下,是一个邀请地址,这里需要把localhost和端口替换为我们grafana的ip和端口后进行访问
http://localhost:3000/invite/aZRgdHbJudW3RnOiZzcFQLi0kwOWKv
修改为
http://:3000/invite/aZRgdHbJudW3RnOiZzcFQLi0kwOWKv
打开链接后输入邮箱/密码点击注册,账号就可以用了