mysql主从+prometheus+grafana

[root@tuiguang master-slave]# tree -L 2
├── conf
│   ├── alertmanager.yml
│   ├── master.conf
│   ├── mysql.yml
│   ├── prometheus-webhook-dingtalk.yml
│   ├── prometheus.yml
│   └── slave.conf
├── data
│   ├── mysql-master
│   └── mysql-slave
├── prometheus.yml
└── template
    └── default.tmpl

mkdir -p /data/master-slave/conf  #配置文件
mkdir -p /data/master-slave/data  #持久化数据
# 创建配置文件
cd /data/master-slave/conf
cat > master.cnf < slave.cnf <
  • 方法一,直接run
docker run -d --name mysql-master -v /data/master-slave/data/mysql-master:/var/lib/mysql -v /data/master-slave/conf/master.conf:/etc/my.cnf -e MYSQL_ROOT_PASSWORD=123456 -p 3001:3306 mysql:8.0
docker run -d --name mysql-slave -v /data/master-slave/data/mysql-slave:/var/lib/mysql -v /data/master-slave/conf/slave.conf:/etc/my.cnf -e MYSQL_ROOT_PASSWORD=123456 -p 3002:3306 mysql:8.0
  • 方法二、docker-compose
version: '3'
services:
  mysql:
    network_mode: "bridge"
    environment:
      MYSQL_ROOT_PASSWORD: "123456"
    image: "mysql:8.0"
    container_name: mysql-master
    restart: always
    volumes:
      - "/data/master-slave/data/mysql-master:/var/lib/mysql"
      - "/data/master-slave/conf/master.conf:/etc/my.cnf"
    ports:
      - "3001:3306"
  mysql:
    network_mode: "bridge"
    environment:
      MYSQL_ROOT_PASSWORD: "123456"
    image: "mysql:8.0"
    container_name: mysql-slave
    restart: always
    volumes:
      - "/data/master-slave/data/mysql-slave:/var/lib/mysql"
      - "/data/master-slave/conf/slave.conf:/etc/my.cnf"
    ports:
      - "3002:3306"
  • mysql.yml
groups:
- name: MySQLStatsAlert
  rules:
  - alert: MySQL是关闭的
    expr: mysql_up == 0
    for: 1m
    labels:
      severity: critical
    annotations:
      summary: "实例 {{ $labels.instance }} MySQL是关闭的"
      description: "MySQL数据库关闭。这需要立即采取行动!"
  - alert: 打开文件数
    expr: mysql_global_status_innodb_num_open_files > (mysql_global_variables_open_files_limit) * 0.75
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "实例 {{ $labels.instance }} 打开文件数过高"
      description: "打开文件数过高。请考虑增加open_files_limit."
  - alert: 读取缓冲区的大小大于最大值,允许数据包大小
    expr: mysql_global_variables_read_buffer_size > mysql_global_variables_slave_max_allowed_packet 
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "实例 {{ $labels.instance }} 读缓冲区大小(read_buffer_size)大于max。允许的数据包大小(max_allowed_packet)"
      description: "读缓冲区大小(read_buffer_size)大于max。允许的数据包大小(max_allowed_packet)。这可能会破坏复制."
  - alert: 排序缓冲区可能错过配置
    expr: mysql_global_variables_innodb_sort_buffer_size <256*1024 or mysql_global_variables_read_buffer_size > 4*1024*1024 
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "实例 {{ $labels.instance }} 排序缓冲区大小不是太大就是太小"
      description: "排序缓冲区大小不是太大就是太小。sort_buffer_size的一个较好的值是256k到4M之间."
  - alert: 线程堆栈大小太小
    expr: mysql_global_variables_thread_stack <196608
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "实例 {{ $labels.instance }} 线程堆栈大小太小"
      description: "线程堆栈大小太小。例如,当您使用存储语言构造时,这可能会导致问题。thread_stack_size的典型值是256k."
  - alert: 使用了超过80%的最大连接限制 
    expr: mysql_global_status_max_used_connections > mysql_global_variables_max_connections * 0.8
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "实例 {{ $labels.instance }} 使用了超过80%的最大连接限制"
      description: "使用了超过80%的最大连接限制"
  - alert: InnoDB 已启用强制恢复
    expr: mysql_global_variables_innodb_force_recovery != 0 
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "实例 {{ $labels.instance }} 已启用InnoDB 强制恢复"
      description: "已启用InnoDB 强制恢复,这种模式只能用于数据恢复目的,它禁止写入数据."
  - alert: InnoDB日志文件太小
    expr: mysql_global_variables_innodb_log_file_size < 16777216 
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "实例 {{ $labels.instance }} nnoDB日志文件大小可能太小"
      description: "InnoDB日志文件大小可能太小,选择较小的InnoDB日志文件大小会对性能产生重大影响."
  - alert: InnoDB事务提交时的Flush日志
    expr: mysql_global_variables_innodb_flush_log_at_trx_commit != 1
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "实例 {{ $labels.instance }} InnoDB事务提交时的Flush日志设置为!= 1"
      description: "InnoDB事务提交时的Flush日志设置为!= 1。这可能导致在电源故障时丢失已提交的事务."
  - alert: 表定义缓存太小
    expr: mysql_global_status_open_table_definitions > mysql_global_variables_table_definition_cache
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "实例 {{ $labels.instance }} 您的表定义缓存可能太小"
      description: "您的表定义缓存可能太小。如果它太小,则会对性能产生重大影响!"
  - alert: 表打开缓存太小
    expr: mysql_global_status_open_tables >mysql_global_variables_table_open_cache * 99/100
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "实例 {{ $labels.instance }} 您的表打开缓存可能太小(旧名称表缓存)"
      description: "您的表打开缓存可能太小(旧名称表缓存)。如果它太小,则会对性能产生重大影响!"
  - alert: 线程堆栈大小可能太小
    expr: mysql_global_variables_thread_stack < 262144
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "实例 {{ $labels.instance }} 线程堆栈大小可能太小"
      description: "线程堆栈大小可能太小,例如,当您使用存储语言构造时,这可能会导致问题,thread_stack_size的典型值是256k."
  - alert: InnoDB缓冲池实例太小
    expr: mysql_global_variables_innodb_buffer_pool_instances == 1
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "实例 {{ $labels.instance }} InnoDB缓冲池实例太小"
      description: "如果你使用的是MySQL 5.5或更高版本,你应该使用几个InnoDB缓冲池实例来提高性能。InnoDB缓冲池实例的大小至少为1G,InnoDB缓冲池实例,你可以设置等于你的机器的核数."
  - alert: InnoDB 插件已启用
    expr: mysql_global_variables_ignore_builtin_innodb == 1
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "实例 {{ $labels.instance }} InnoDB 插件已启用"
      description: "InnoDB 插件已启用"
  - alert: 禁用二进制日志
    expr: mysql_global_variables_log_bin != 1
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "实例 {{ $labels.instance }} 禁用二进制日志"
      description: "二进制日志被禁用。这就禁止您进行时间点恢复(PiTR)。"
  - alert: Binlog缓存大小太小
    expr: mysql_global_variables_binlog_cache_size < 1048576
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "实例 {{ $labels.instance }} Binlog缓存大小太小"
      description: "Binlog缓存大小可能太小了。1M或更高的值是可以的."
  - alert: Binlog语句缓存大小太小
    expr: mysql_global_variables_binlog_stmt_cache_size <1048576 and mysql_global_variables_binlog_stmt_cache_size > 0
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "实例 {{ $labels.instance }} Binlog语句缓存大小太小"
      description: "Binlog语句缓存大小可能太小了。1M或更高的值通常是可以的."
  - alert: Binlog同步已启用
    expr: mysql_global_variables_sync_binlog == 1
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "实例 {{ $labels.instance }} Binlog同步已启用"
      description: "Binlog同步已启用。这带来了更高的数据安全性,但以写入性能为代价."
  - alert: IO线程停止
    expr: mysql_slave_status_slave_io_running != 1
    for: 1m
    labels:
      severity: critical
    annotations:
      summary: "实例 {{ $labels.instance }} IO线程停止"
      description: "IO线程已停止。这通常是因为它无法连接到Master."
  - alert: SQL线程停止 
    expr: mysql_slave_status_slave_sql_running == 0
    for: 1m
    labels:
      severity: critical
    annotations:
      summary: "实例 {{ $labels.instance }} SQL线程停止"
      description: "SQL线程已停止。这通常是因为它不能应用从主程序接收到的SQL语句."
  - alert: Slave 落后于 Master
    expr: rate(mysql_slave_status_seconds_behind_master[1m]) >30 
    for: 1m
    labels:
      severity: warning 
    annotations:
      summary: "实例 {{ $labels.instance }} Slave 落后于 Master"
      description: "Slave 落后于 Master. 请检查从线程是否正在运行,以及是否有一些性能问题!"
  - alert: Slave不是只读的
    expr: mysql_global_variables_read_only != 0
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "实例 {{ $labels.instance }} Slave不是只读的"
      description: "Slave没有设置为只读。你可能会不小心操纵从服务器上的数据,得到不一致的结果……"

  • prometheus.yml
# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets:
rule_files:
   - /etc/prometheus/mysql.yml
scrape_configs:
  # The job name is added as a label `job=` to any timeseries scraped from this config.
  - job_name: 'mysql_monitor'
    static_configs:
    #- targets: ['mysqld-exporter-master:9104','mysqld-exporter-slave:9104']
    - targets: ['192.168.10.88:3003','192.168.10.88:3004']
  • alertmanager.yml
route:
  group_by: ['alertname']
  group_wait: 1s
  group_interval: 1m
  repeat_interval: 4h
  receiver: 'webhook1'
receivers:
- name: 'webhook1'
  webhook_configs:
    - &dingtalk_config
       send_resolved: true
       url: http://192.168.10.88:3008/dingtalk/webhook1/send
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']
  • prometheus-webhook-dingtalk.yml
## Request timeout
# timeout: 5s

## Customizable templates path
templates:
   - /etc/prometheus-webhook-dingtalk/template/default.tmpl
targets:
  webhook1:
    url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
    # secret for signature
    secret: SECc4148b709fb3db931d31c7a55457d9797c302a3d8257586e935cf4003440d6c0
  webhook2:
    url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
  webhook_legacy:
    url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
    # Customize template content
    message:
      # Use legacy template
      title: '{{ template "legacy.title" . }}'
      text: '{{ template "legacy.content" . }}'
  webhook_mention_all:
    url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
    mention:
      all: true
  webhook_mention_users:
    url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
    mention:
      mobiles: ['156xxxx8827', '189xxxx8325']
  • default.tmpl
{{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }}
{{ define "__alertmanagerURL" }}http://192.168.10.88:3005/alerts {{ end }}

{{ define "__text_alert_list" }}{{ range . }}
**Labels**
{{ range .Labels.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}
**Annotations**
{{ range .Annotations.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}
**Source:** [{{ .GeneratorURL }}]({{ .GeneratorURL }})
{{ end }}{{ end }}

{{ define "default.__text_alert_list" }}{{ range . }}
---
【告警状态】:{{ .Status }}
【告警级别】:{{ .Labels.severity }}
【告警类型】:{{ .Labels.alertname }}
【告警应用】:{{ .Annotations.summary }}
【告警主机】:{{ .Labels.instance }}
【告警详情】:{{ .Annotations.description }}
【告警时间】:{{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
{{ end }}
{{ end }}

{{ define "default.__text_alertresolve_list" }}{{ range . }}
---
【告警状态】: {{ .Status }}
【告警级别】: {{ .Labels.severity }}
【告警类型】: {{ .Labels.alertname }}
【告警应用】: {{ .Annotations.summary }}
【告警主机】: {{ .Labels.instance }}
【告警详情】: {{ .Annotations.description }}
【触发时间】: {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
【结束时间】: {{ dateInZone "2006.01.02 15:04:05" (.EndsAt) "Asia/Shanghai" }}
{{ end }}
{{ end }}

{{/* Default */}}
{{ define "default.title" }}{{ template "__subject" . }}{{ end }}
{{ define "default.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**

{{ if gt (len .Alerts.Firing) 0 -}} 

**=====好家伙,来活了~~~=====**
{{ template "default.__text_alert_list" .Alerts.Firing }} 
---------------------------------------------------------

{{- end }}

{{ if gt (len .Alerts.Resolved) 0 -}}

**=====舒服了~~~   =====**
{{ template "default.__text_alertresolve_list" .Alerts.Resolved }}

{{- end }}
{{- end }}

{{/* Legacy */}}
{{ define "legacy.title" }}{{ template "__subject" . }}{{ end }}
{{ define "legacy.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**
{{ template "__text_alert_list" .Alerts.Firing }}
{{- end }}

{{/* Following names for compatibility */}}
{{ define "ding.link.title" }}{{ template "default.title" . }}{{ end }}
{{ define "ding.link.content" }}{{ template "default.content" . }}{{ end }}

mysqld_exporter

CREATE USER 'mysql_monitor'@'%' IDENTIFIED BY 'mysql_monitor' WITH MAX_USER_CONNECTIONS 3;
GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'mysql_monitor'@'%' ;
commit;
FLUSH PRIVILEGES;
docker run -d --name mysqld_exporter -p 3003:9104 -e DATA_SOURCE_NAME="mysql_monitor:mysql_monitor@(192.168.10.88:3002)/" prom/mysqld-exporter
version: '3'
services:
  mysqld-exporter-master:
    network_mode: "bridge"
    environment:
      DATA_SOURCE_NAME: "mysql_monitor:mysql_monitor@(192.168.10.88:3001)/"
    image: "prom/mysqld-exporter"
    container_name: mysqld-exporter-master
    restart: always
    ports:
      - "3003:9104"
  mysqld-exporter-slave:
    depends_on:
      - mysqld-exporter-master
    network_mode: "bridge"
    environment:
      DATA_SOURCE_NAME: "mysql_monitor:mysql_monitor@(192.168.10.88:3002)/"
    image: "prom/mysqld-exporter"
    container_name: mysqld-exporter-slave
    restart: always
    ports:
      - "3004:9104"
  prometheus:
    depends_on:
      - mysqld-exporter-master
      - mysqld-exporter-slave
    network_mode: "bridge"
    environment:
      - TZ=Asia/Shanghai
    image: "prom/prometheus:latest"
    container_name: prometheus
    restart: always
    volumes:
      - "/data/master-slave/conf/mysql.yml:/etc/prometheus/mysql.yml"
      - "/data/master-slave/conf/prometheus.yml:/etc/prometheus/prometheus.yml"
      - "/etc/localtime:/etc/localtime:ro"
    ports:
      - "3005:9090"
  alertmanager:
    depends_on:
      - prometheus
    network_mode: "bridge"
    image: "prom/alertmanager:latest"
    container_name: alertmanager
    restart: always
    volumes:
      - "/data/master-slave/conf/alertmanager.yml:/etc/alertmanager/alertmanager.yml"
    ports:
      - "3007:9093"
  prometheus-webhook-dingtalk:
    depends_on:
      - prometheus
    network_mode: "bridge"
    image: "timonwong/prometheus-webhook-dingtalk:latest"
    container_name: prometheus-webhook-dingtalk
    restart: always
    volumes:
      - "/data/master-slave/conf/prometheus-webhook-dingtalk.yml:/etc/prometheus-webhook-dingtalk/config.yml"
      - "/data/master-slave/template:/etc/prometheus-webhook-dingtalk/template"
    ports:      
      - "3008:9093"
  grafana:
    depends_on:
      - prometheus
    network_mode: "bridge"
    image: grafana/grafana:latest
    container_name: grafana
    restart: always
    ports:
      - "3006:3000"

你可能感兴趣的:(mysql主从+prometheus+grafana)