[root@tuiguang master-slave]# tree -L 2
├── conf
│ ├── alertmanager.yml
│ ├── master.conf
│ ├── mysql.yml
│ ├── prometheus-webhook-dingtalk.yml
│ ├── prometheus.yml
│ └── slave.conf
├── data
│ ├── mysql-master
│ └── mysql-slave
├── prometheus.yml
└── template
└── default.tmpl
mkdir -p /data/master-slave/conf #配置文件
mkdir -p /data/master-slave/data #持久化数据
# 创建配置文件
cd /data/master-slave/conf
cat > master.cnf < slave.cnf <
- 方法一,直接run
docker run -d --name mysql-master -v /data/master-slave/data/mysql-master:/var/lib/mysql -v /data/master-slave/conf/master.conf:/etc/my.cnf -e MYSQL_ROOT_PASSWORD=123456 -p 3001:3306 mysql:8.0
docker run -d --name mysql-slave -v /data/master-slave/data/mysql-slave:/var/lib/mysql -v /data/master-slave/conf/slave.conf:/etc/my.cnf -e MYSQL_ROOT_PASSWORD=123456 -p 3002:3306 mysql:8.0
- 方法二、docker-compose
version: '3'
services:
mysql:
network_mode: "bridge"
environment:
MYSQL_ROOT_PASSWORD: "123456"
image: "mysql:8.0"
container_name: mysql-master
restart: always
volumes:
- "/data/master-slave/data/mysql-master:/var/lib/mysql"
- "/data/master-slave/conf/master.conf:/etc/my.cnf"
ports:
- "3001:3306"
mysql:
network_mode: "bridge"
environment:
MYSQL_ROOT_PASSWORD: "123456"
image: "mysql:8.0"
container_name: mysql-slave
restart: always
volumes:
- "/data/master-slave/data/mysql-slave:/var/lib/mysql"
- "/data/master-slave/conf/slave.conf:/etc/my.cnf"
ports:
- "3002:3306"
- mysql.yml
groups:
- name: MySQLStatsAlert
rules:
- alert: MySQL是关闭的
expr: mysql_up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "实例 {{ $labels.instance }} MySQL是关闭的"
description: "MySQL数据库关闭。这需要立即采取行动!"
- alert: 打开文件数
expr: mysql_global_status_innodb_num_open_files > (mysql_global_variables_open_files_limit) * 0.75
for: 1m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} 打开文件数过高"
description: "打开文件数过高。请考虑增加open_files_limit."
- alert: 读取缓冲区的大小大于最大值,允许数据包大小
expr: mysql_global_variables_read_buffer_size > mysql_global_variables_slave_max_allowed_packet
for: 1m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} 读缓冲区大小(read_buffer_size)大于max。允许的数据包大小(max_allowed_packet)"
description: "读缓冲区大小(read_buffer_size)大于max。允许的数据包大小(max_allowed_packet)。这可能会破坏复制."
- alert: 排序缓冲区可能错过配置
expr: mysql_global_variables_innodb_sort_buffer_size <256*1024 or mysql_global_variables_read_buffer_size > 4*1024*1024
for: 1m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} 排序缓冲区大小不是太大就是太小"
description: "排序缓冲区大小不是太大就是太小。sort_buffer_size的一个较好的值是256k到4M之间."
- alert: 线程堆栈大小太小
expr: mysql_global_variables_thread_stack <196608
for: 1m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} 线程堆栈大小太小"
description: "线程堆栈大小太小。例如,当您使用存储语言构造时,这可能会导致问题。thread_stack_size的典型值是256k."
- alert: 使用了超过80%的最大连接限制
expr: mysql_global_status_max_used_connections > mysql_global_variables_max_connections * 0.8
for: 1m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} 使用了超过80%的最大连接限制"
description: "使用了超过80%的最大连接限制"
- alert: InnoDB 已启用强制恢复
expr: mysql_global_variables_innodb_force_recovery != 0
for: 1m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} 已启用InnoDB 强制恢复"
description: "已启用InnoDB 强制恢复,这种模式只能用于数据恢复目的,它禁止写入数据."
- alert: InnoDB日志文件太小
expr: mysql_global_variables_innodb_log_file_size < 16777216
for: 1m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} nnoDB日志文件大小可能太小"
description: "InnoDB日志文件大小可能太小,选择较小的InnoDB日志文件大小会对性能产生重大影响."
- alert: InnoDB事务提交时的Flush日志
expr: mysql_global_variables_innodb_flush_log_at_trx_commit != 1
for: 1m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} InnoDB事务提交时的Flush日志设置为!= 1"
description: "InnoDB事务提交时的Flush日志设置为!= 1。这可能导致在电源故障时丢失已提交的事务."
- alert: 表定义缓存太小
expr: mysql_global_status_open_table_definitions > mysql_global_variables_table_definition_cache
for: 1m
labels:
severity: page
annotations:
summary: "实例 {{ $labels.instance }} 您的表定义缓存可能太小"
description: "您的表定义缓存可能太小。如果它太小,则会对性能产生重大影响!"
- alert: 表打开缓存太小
expr: mysql_global_status_open_tables >mysql_global_variables_table_open_cache * 99/100
for: 1m
labels:
severity: page
annotations:
summary: "实例 {{ $labels.instance }} 您的表打开缓存可能太小(旧名称表缓存)"
description: "您的表打开缓存可能太小(旧名称表缓存)。如果它太小,则会对性能产生重大影响!"
- alert: 线程堆栈大小可能太小
expr: mysql_global_variables_thread_stack < 262144
for: 1m
labels:
severity: page
annotations:
summary: "实例 {{ $labels.instance }} 线程堆栈大小可能太小"
description: "线程堆栈大小可能太小,例如,当您使用存储语言构造时,这可能会导致问题,thread_stack_size的典型值是256k."
- alert: InnoDB缓冲池实例太小
expr: mysql_global_variables_innodb_buffer_pool_instances == 1
for: 1m
labels:
severity: page
annotations:
summary: "实例 {{ $labels.instance }} InnoDB缓冲池实例太小"
description: "如果你使用的是MySQL 5.5或更高版本,你应该使用几个InnoDB缓冲池实例来提高性能。InnoDB缓冲池实例的大小至少为1G,InnoDB缓冲池实例,你可以设置等于你的机器的核数."
- alert: InnoDB 插件已启用
expr: mysql_global_variables_ignore_builtin_innodb == 1
for: 1m
labels:
severity: page
annotations:
summary: "实例 {{ $labels.instance }} InnoDB 插件已启用"
description: "InnoDB 插件已启用"
- alert: 禁用二进制日志
expr: mysql_global_variables_log_bin != 1
for: 1m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} 禁用二进制日志"
description: "二进制日志被禁用。这就禁止您进行时间点恢复(PiTR)。"
- alert: Binlog缓存大小太小
expr: mysql_global_variables_binlog_cache_size < 1048576
for: 1m
labels:
severity: page
annotations:
summary: "实例 {{ $labels.instance }} Binlog缓存大小太小"
description: "Binlog缓存大小可能太小了。1M或更高的值是可以的."
- alert: Binlog语句缓存大小太小
expr: mysql_global_variables_binlog_stmt_cache_size <1048576 and mysql_global_variables_binlog_stmt_cache_size > 0
for: 1m
labels:
severity: page
annotations:
summary: "实例 {{ $labels.instance }} Binlog语句缓存大小太小"
description: "Binlog语句缓存大小可能太小了。1M或更高的值通常是可以的."
- alert: Binlog同步已启用
expr: mysql_global_variables_sync_binlog == 1
for: 1m
labels:
severity: page
annotations:
summary: "实例 {{ $labels.instance }} Binlog同步已启用"
description: "Binlog同步已启用。这带来了更高的数据安全性,但以写入性能为代价."
- alert: IO线程停止
expr: mysql_slave_status_slave_io_running != 1
for: 1m
labels:
severity: critical
annotations:
summary: "实例 {{ $labels.instance }} IO线程停止"
description: "IO线程已停止。这通常是因为它无法连接到Master."
- alert: SQL线程停止
expr: mysql_slave_status_slave_sql_running == 0
for: 1m
labels:
severity: critical
annotations:
summary: "实例 {{ $labels.instance }} SQL线程停止"
description: "SQL线程已停止。这通常是因为它不能应用从主程序接收到的SQL语句."
- alert: Slave 落后于 Master
expr: rate(mysql_slave_status_seconds_behind_master[1m]) >30
for: 1m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} Slave 落后于 Master"
description: "Slave 落后于 Master. 请检查从线程是否正在运行,以及是否有一些性能问题!"
- alert: Slave不是只读的
expr: mysql_global_variables_read_only != 0
for: 1m
labels:
severity: page
annotations:
summary: "实例 {{ $labels.instance }} Slave不是只读的"
description: "Slave没有设置为只读。你可能会不小心操纵从服务器上的数据,得到不一致的结果……"
- prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
rule_files:
- /etc/prometheus/mysql.yml
scrape_configs:
# The job name is added as a label `job=` to any timeseries scraped from this config.
- job_name: 'mysql_monitor'
static_configs:
#- targets: ['mysqld-exporter-master:9104','mysqld-exporter-slave:9104']
- targets: ['192.168.10.88:3003','192.168.10.88:3004']
- alertmanager.yml
route:
group_by: ['alertname']
group_wait: 1s
group_interval: 1m
repeat_interval: 4h
receiver: 'webhook1'
receivers:
- name: 'webhook1'
webhook_configs:
- &dingtalk_config
send_resolved: true
url: http://192.168.10.88:3008/dingtalk/webhook1/send
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
- prometheus-webhook-dingtalk.yml
## Request timeout
# timeout: 5s
## Customizable templates path
templates:
- /etc/prometheus-webhook-dingtalk/template/default.tmpl
targets:
webhook1:
url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
# secret for signature
secret: SECc4148b709fb3db931d31c7a55457d9797c302a3d8257586e935cf4003440d6c0
webhook2:
url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
webhook_legacy:
url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
# Customize template content
message:
# Use legacy template
title: '{{ template "legacy.title" . }}'
text: '{{ template "legacy.content" . }}'
webhook_mention_all:
url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
mention:
all: true
webhook_mention_users:
url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
mention:
mobiles: ['156xxxx8827', '189xxxx8325']
- default.tmpl
{{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }}
{{ define "__alertmanagerURL" }}http://192.168.10.88:3005/alerts {{ end }}
{{ define "__text_alert_list" }}{{ range . }}
**Labels**
{{ range .Labels.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}
**Annotations**
{{ range .Annotations.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}
**Source:** [{{ .GeneratorURL }}]({{ .GeneratorURL }})
{{ end }}{{ end }}
{{ define "default.__text_alert_list" }}{{ range . }}
---
【告警状态】:{{ .Status }}
【告警级别】:{{ .Labels.severity }}
【告警类型】:{{ .Labels.alertname }}
【告警应用】:{{ .Annotations.summary }}
【告警主机】:{{ .Labels.instance }}
【告警详情】:{{ .Annotations.description }}
【告警时间】:{{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
{{ end }}
{{ end }}
{{ define "default.__text_alertresolve_list" }}{{ range . }}
---
【告警状态】: {{ .Status }}
【告警级别】: {{ .Labels.severity }}
【告警类型】: {{ .Labels.alertname }}
【告警应用】: {{ .Annotations.summary }}
【告警主机】: {{ .Labels.instance }}
【告警详情】: {{ .Annotations.description }}
【触发时间】: {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
【结束时间】: {{ dateInZone "2006.01.02 15:04:05" (.EndsAt) "Asia/Shanghai" }}
{{ end }}
{{ end }}
{{/* Default */}}
{{ define "default.title" }}{{ template "__subject" . }}{{ end }}
{{ define "default.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**
{{ if gt (len .Alerts.Firing) 0 -}}
**=====好家伙,来活了~~~=====**
{{ template "default.__text_alert_list" .Alerts.Firing }}
---------------------------------------------------------
{{- end }}
{{ if gt (len .Alerts.Resolved) 0 -}}
**=====舒服了~~~ =====**
{{ template "default.__text_alertresolve_list" .Alerts.Resolved }}
{{- end }}
{{- end }}
{{/* Legacy */}}
{{ define "legacy.title" }}{{ template "__subject" . }}{{ end }}
{{ define "legacy.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**
{{ template "__text_alert_list" .Alerts.Firing }}
{{- end }}
{{/* Following names for compatibility */}}
{{ define "ding.link.title" }}{{ template "default.title" . }}{{ end }}
{{ define "ding.link.content" }}{{ template "default.content" . }}{{ end }}
mysqld_exporter
CREATE USER 'mysql_monitor'@'%' IDENTIFIED BY 'mysql_monitor' WITH MAX_USER_CONNECTIONS 3;
GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'mysql_monitor'@'%' ;
commit;
FLUSH PRIVILEGES;
docker run -d --name mysqld_exporter -p 3003:9104 -e DATA_SOURCE_NAME="mysql_monitor:mysql_monitor@(192.168.10.88:3002)/" prom/mysqld-exporter
version: '3'
services:
mysqld-exporter-master:
network_mode: "bridge"
environment:
DATA_SOURCE_NAME: "mysql_monitor:mysql_monitor@(192.168.10.88:3001)/"
image: "prom/mysqld-exporter"
container_name: mysqld-exporter-master
restart: always
ports:
- "3003:9104"
mysqld-exporter-slave:
depends_on:
- mysqld-exporter-master
network_mode: "bridge"
environment:
DATA_SOURCE_NAME: "mysql_monitor:mysql_monitor@(192.168.10.88:3002)/"
image: "prom/mysqld-exporter"
container_name: mysqld-exporter-slave
restart: always
ports:
- "3004:9104"
prometheus:
depends_on:
- mysqld-exporter-master
- mysqld-exporter-slave
network_mode: "bridge"
environment:
- TZ=Asia/Shanghai
image: "prom/prometheus:latest"
container_name: prometheus
restart: always
volumes:
- "/data/master-slave/conf/mysql.yml:/etc/prometheus/mysql.yml"
- "/data/master-slave/conf/prometheus.yml:/etc/prometheus/prometheus.yml"
- "/etc/localtime:/etc/localtime:ro"
ports:
- "3005:9090"
alertmanager:
depends_on:
- prometheus
network_mode: "bridge"
image: "prom/alertmanager:latest"
container_name: alertmanager
restart: always
volumes:
- "/data/master-slave/conf/alertmanager.yml:/etc/alertmanager/alertmanager.yml"
ports:
- "3007:9093"
prometheus-webhook-dingtalk:
depends_on:
- prometheus
network_mode: "bridge"
image: "timonwong/prometheus-webhook-dingtalk:latest"
container_name: prometheus-webhook-dingtalk
restart: always
volumes:
- "/data/master-slave/conf/prometheus-webhook-dingtalk.yml:/etc/prometheus-webhook-dingtalk/config.yml"
- "/data/master-slave/template:/etc/prometheus-webhook-dingtalk/template"
ports:
- "3008:9093"
grafana:
depends_on:
- prometheus
network_mode: "bridge"
image: grafana/grafana:latest
container_name: grafana
restart: always
ports:
- "3006:3000"