近几年来,持续集成、微服务的影响范围不断扩大,Docker微服务架构,已渐渐成为服务器架构的主流。但是在微服务、持续集成的作用下,将会产生成百上千的服务组件;而每个服务组件容器的生命周期管理工作日渐突出。为了提高对容器生命周期、使用内存、Cpu、负载等等指标刻不容缓,尽可能的细化每一个服务使用资源,提高整体资源利用率。传统开源监控系统如Nagios、Zabbix等,虽然这些监控系统支持自定义监控,但是做到如此精细化的监控每个容器相信整个过程会很麻烦。而与此同时Prometheus横空出世,简直是天降奇兵,展现出强大的优势。
动态发现服务从而实现动态监控目标,支持consul服务注册、DNS、Kubernetes、监听文件自动发现等方式
可以在每个数据中心、每个团队运行独立的Prometheus Sevrer。Prometheus对于联邦集群的支持,可以让多个Prometheus实例产生一个逻辑集群,当单实例Prometheus Server处理的任务量过大时,通过使用功能分区(sharding)+联邦集群(federation)可以对其进行扩展。
目前支持大量客户端SDK:Java、Jmx、Python、Go、Ruby、.Net、Node.js等等,同时社区提供大量第三方实现的监控数据采集。
Prometheus Server是Prometheus组件中的核心部分,负责实现对监控数据的获取,存储以及查询。 Prometheus Server可以通过静态配置管理监控目标,也可以配合使用Service Discovery的方式动态管理监控目标,并从这些监控目标中获取数据。其次Prometheus Server需要对采集到的监控数据进行存储,Prometheus Server本身就是一个时序数据库,将采集到的监控数据按照时间序列的方式存储在本地磁盘当中。最后Prometheus Server对外提供了自定义的PromQL语言,实现对数据的查询以及分析。
Exporter将监控数据采集的端点通过HTTP服务的形式暴露给Prometheus Server,Prometheus Server通过访问该Exporter提供的Endpoint端点,即可获取到需要采集的监控数据
在Prometheus Server中支持基于PromQL创建告警规则,如果满足PromQL定义的规则,则会产生一条告警,而告警的后续处理流程则由AlertManager进行管理。在AlertManager中我们可以与邮件,Slack等等内置的通知方式进行集成,也可以通过Webhook自定义告警处理方式。AlertManager即Prometheus体系中的告警处理中心。
由于Prometheus数据采集基于Pull模型进行设计,因此在网络环境的配置上必须要让Prometheus Server能够直接与Exporter进行通信。 当这种网络需求无法直接满足时,就可以利用PushGateway来进行中转。可以通过PushGateway将内部网络的监控数据主动Push到Gateway当中。而Prometheus Server则可以采用同样Pull的方式从PushGateway中获取到监控数据。
Prometheus支持本地存储很远程存储,其本身内置了一个基于本地存储的时间序列数据库,本地存储设计可以减少其自身运维和管理的复杂度,同时能够满足大部分用户监控规模的需求,但是本地存储也意味着Prometheus不适合保存大量历史数据(一般Prometheus推荐只保留几周或者几个月的数据)。最后本地存储也导致Prometheus无法进行弹性扩展。
为了保持Prometheus的简单性,Prometheus并没有尝试在自身中解决以上问题,而是通过定义两个标准接口(remote_write/remote_read),让用户可以基于这两个接口对接将数据保存到任意第三方的存储服务中,这种方式在Promthues中称为Remote Storage。
存储服务 | 支持模式 |
---|---|
AppOptics | write |
Chronix | write |
Cortex: | read/write |
CrateDB | read/write |
Gnocchi | write |
Graphite | write |
InfluxDB | read/write |
OpenTSDB | write |
Elasticsearch | write |
Wavefront | write |
Kafka | write |
PostgreSQL/TimescaleDB: | read/write |
SignalFx | write |
还有很多就不一一列举,更多内容可以查看官网。本文选用Influxdb来存储Prometheus监控数据,即可支持读也支持写。
cat << EOF > /data/server/influxd/influxdb.conf
reporting-disabled = true
bind-address = "0.0.0.0:8088"
[meta]
dir = "/var/lib/influxdb/meta"
retention-autocreate = true
logging-enabled = true
[data]
dir = "/var/lib/influxdb/data"
index-version = "inmem"
wal-dir = "/var/lib/influxdb/wal"
wal-fsync-delay = "0s"
validate-keys = false
query-log-enabled = true
cache-max-memory-size = 1073741824
cache-snapshot-memory-size = 26214400
cache-snapshot-write-cold-duration = "10m0s"
compact-full-write-cold-duration = "4h0m0s"
compact-throughput = 50331648
compact-throughput-burst = 50331648
max-series-per-database = 1000000
max-values-per-tag = 100000
max-concurrent-compactions = 0
max-index-log-file-size = 1048576
series-id-set-cache-size = 100
trace-logging-enabled = false
tsm-use-madv-willneed = false
[coordinator]
write-timeout = "10s"
max-concurrent-queries = 0
query-timeout = "0s"
log-queries-after = "0s"
max-select-point = 0
max-select-series = 0
max-select-buckets = 0
[retention]
enabled = true
check-interval = "30m0s"
[shard-precreation]
enabled = true
check-interval = "10m0s"
advance-period = "30m0s"
[monitor]
store-enabled = true
store-database = "_internal"
store-interval = "10s"
[subscriber]
enabled = true
http-timeout = "30s"
insecure-skip-verify = false
ca-certs = ""
write-concurrency = 40
write-buffer-size = 1000
[http]
enabled = true
bind-address = ":8086"
auth-enabled = false
log-enabled = true
suppress-write-log = false
write-tracing = false
flux-enabled = false
flux-log-enabled = false
pprof-enabled = true
debug-pprof-enabled = false
https-enabled = false
https-certificate = "/etc/ssl/influxdb.pem"
https-private-key = ""
max-row-limit = 0
max-connection-limit = 0
shared-secret = ""
realm = "InfluxDB"
unix-socket-enabled = false
unix-socket-permissions = "0777"
bind-socket = "/var/run/influxdb.sock"
max-body-size = 25000000
access-log-path = ""
max-concurrent-write-limit = 0
max-enqueued-write-limit = 0
enqueued-write-timeout = 30000000000
[logging]
format = "auto"
level = "info"
suppress-logo = false
[[graphite]]
enabled = false
bind-address = ":2003"
database = "graphite"
retention-policy = ""
protocol = "tcp"
batch-size = 5000
batch-pending = 10
batch-timeout = "1s"
consistency-level = "one"
separator = "."
udp-read-buffer = 0
[[collectd]]
enabled = false
bind-address = ":25826"
database = "collectd"
retention-policy = ""
batch-size = 5000
batch-pending = 10
batch-timeout = "10s"
read-buffer = 0
typesdb = "/usr/share/collectd/types.db"
security-level = "none"
auth-file = "/etc/collectd/auth_file"
parse-multivalue-plugin = "split"
[[opentsdb]]
enabled = false
bind-address = ":4242"
database = "opentsdb"
retention-policy = ""
consistency-level = "one"
tls-enabled = false
certificate = "/etc/ssl/influxdb.pem"
batch-size = 1000
batch-pending = 5
batch-timeout = "1s"
log-point-errors = true
[[udp]]
enabled = false
bind-address = ":8089"
database = "udp"
retention-policy = ""
batch-size = 5000
batch-pending = 10
read-buffer = 0
batch-timeout = "1s"
precision = ""
[continuous_queries]
log-enabled = true
enabled = true
query-stats-enabled = false
run-interval = "1s"
[tls]
min-version = ""
max-version = ""
EOF
docker run --net=host -d --restart=always --name influxdb \
-e TZ="Asia/Shanghai" \
-v /data/server/influxd/influxdb.conf:/etc/influxdb/influxdb.conf:ro \
-v /data/server/influxd/meta:/var/lib/influxdb/meta \
-v /data/server/influxd/data:/var/lib/influxdb/data \
influxdb -config /etc/influxdb/influxdb.conf
docker run -d --net=host \
-v /data/server/consul:/consul/data \
-e CONSUL_BIND_INTERFACE='eth0' \
--name=consul consul agent -server \
-bootstrap -ui -client='0.0.0.0'
cat /data/server/prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 192.168.16.148:9093
remote_write:
- url: "http://192.168.16.148:8086/api/v1/prom/write?db=prometheus"
remote_read:
- url: "http://192.168.16.148:8086/api/v1/prom/read?db=prometheus"
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- /etc/prometheus/rules/*.rules
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=` to any timeseries scraped from this config.
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
- job_name: 'consul-kong-exporter'
consul_sd_configs:
- server: '192.168.16.148:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*kong.*
action: keep
- regex: __meta_consul_service_metadata_(.+)
action: labelmap
- job_name: 'consul-elasticsearch-exporter'
metrics_path: /_prometheus/metrics
consul_sd_configs:
- server: '192.168.16.148:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*elasticsearch.*
action: keep
- regex: __meta_consul_service_metadata_(.+)
action: labelmap
- job_name: 'cadvisor'
consul_sd_configs:
- server: '192.168.16.148:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*cadvisor.*
action: keep
- regex: __meta_consul_service_metadata_(.+)
action: labelmap
- job_name: 'node-exporter'
consul_sd_configs:
- server: '192.168.16.148:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*node-exporter.*
action: keep
- regex: __meta_consul_service_metadata_(.+)
action: labelmap
- job_name: 'rocketmq'
consul_sd_configs:
- server: '192.168.16.148:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*rocketmq.*
action: keep
- regex: __meta_consul_service_metadata_(.+)
action: labelmap
- job_name: 'kafka'
consul_sd_configs:
- server: '192.168.16.148:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*kafka.*
action: keep
- regex: __meta_consul_service_metadata_(.+)
action: labelmap
- job_name: 'zookeeper'
consul_sd_configs:
- server: '192.168.16.148:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*zookeeper.*
action: keep
- regex: __meta_consul_service_metadata_(.+)
action: labelmap
- job_name: 'mongodb'
consul_sd_configs:
- server: '192.168.16.148:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*mongodb.*
action: keep
- regex: __meta_consul_service_metadata_(.+)
action: labelmap
- job_name: 'mysql'
consul_sd_configs:
- server: '192.168.16.148:8500'
services: []
relabel_configs:
- source_labels: [__meta_consul_tags]
regex: .*mysql.*
action: keep
- regex: __meta_consul_service_metadata_(.+)
action: labelmap
docker run -d --net=host --name prometheus --restart=always \
--log-driver json-file --log-opt max-size=10m --log-opt max-file=7 \
-v /data/server/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml \
-v /data/server/prometheus/rules:/etc/prometheus/rules prom/prometheus \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/prometheus \
--web.console.libraries=/usr/share/prometheus/console_libraries \
--web.console.templates=/usr/share/prometheus/consoles \
--web.enable-lifecycle
docker run -e TZ="Asia/Shanghai" \
-d --net=host --restart=always \
--log-driver json-file --log-opt max-size=10m --log-opt max-file=7 \
--volume=/:/rootfs:ro \
--volume=/var/run:/var/run:ro \
--volume=/sys:/sys:ro \
--volume=/var/lib/docker/:/var/lib/docker:ro \
--volume=/dev/disk:/dev/disk:ro \
--detach=true \
--name=cadvisor \
registry.cn-hangzhou.aliyuncs.com/zhijing/cadvisor:latest
cat << EOF > cadvisor.json
{
"ID": "yumwei-192.168.16.148",
"Name": "yumwei",
"Tags": [
"cadvisor"
],
"Address": "192.168.16.148",
"Port": 8080,
"Meta": {
"service": "cadvisor",
"team": "appgroup",
"project": "yunwei"
},
"EnableTagOverride": false,
"Check": {
"HTTP": "http://192.168.16.148:8080/metrics",
"Interval": "10s"
},
"Weights": {
"Passing": 10,
"Warning": 1
}
}
EOF
curl --request PUT --data @cadvisor.json http://192.168.16.148:8500/v1/agent/service/register?replace-existing-checks=1
cat << EOF > /data/server/alertmanager/alertmanager.yml
global:
smtp_smarthost: 'smtp.alongparty.cn:25'
smtp_from: '[email protected]'
smtp_auth_username: '[email protected]'
smtp_auth_password: 'tRpD2fpzE6zp2XQq'
smtp_require_tls: false
templates:
- '/etc/alertmanager/tempaltes/*.tmpl'
route:
receiver: 'webhook'
# 分组等待时间
group_wait: 30s
# 分组尝试再次发送告警的时间间隔
group_interval: 10m
# 分组内发送相同告警的时间间隔
repeat_interval: 30m
# 分组参数,以alertname分组
group_by: [alertname]
# All alerts that do not match the following child routes
# will remain at the root node and be dispatched to 'default-receiver'.
routes:
- receiver: 'default-receiver'
group_wait: 10s
match_re:
team: yunwei
- receiver: 'bigdata'
group_wait: 10s
match_re:
team: bigdata
- receiver: 'mes'
group_wait: 10s
match_re:
team: mes
- receiver: 'baibu-dev'
group_wait: 10s
match_re:
team: baibu-dev
- receiver: 'zz-mes'
group_wait: 10s
match_re:
team: zz-mes
# All alerts with service=mysql or service=cassandra
# are dispatched to the database pager.
# All alerts with the team=frontend label match this sub-route.
# They are grouped by product and environment rather than cluster
# and alertname.
receivers:
- name: 'default-receiver'
email_configs:
- to: '[email protected]'
html: '{{ template "cadvisor.html" . }}'
headers: { Subject: "[{{ .CommonLabels.team}}] {{ .CommonAnnotations.description }}" }
- name: 'webhook'
webhook_configs:
- send_resolved: true
http_config: {}
url: http://10.237.3.50:5000/send
- name: 'baibu-dev'
email_configs:
- to: '[email protected],[email protected]'
html: '{{ template "cadvisor.html" . }}'
headers: { Subject: "[{{ .CommonLabels.team}}] {{ .CommonAnnotations.description }}" }
EOF
cat /data/server/alertmanager/tempaltes/cadvisor.tmpl
{{ define "cadvisor.html" }}
<table border="5">
<tr>
<td>报警项</td>
<td>状态</td>
<td>服务名称</td>
<td>服务部署地址</td>
<td>描述</td>
<td>开始时间</td>
</tr>
{{ range $i, $alert := .Alerts }}
<tr>
<td>{{ index $alert.Labels "alertname" }}</td>
<td>{{ $alert.Status }}</td>
<td>{{ index $alert.Labels "name" }}</td>
<td>{{ index $alert.Labels "instance" }}</td>
<td>{{ $alert.Annotations.description }}</td>
<td>{{ $alert.StartsAt }}</td>
</tr>
{{ end }}
</table>
{{ end }}
docker run -d --net host \
-v /data/server/alertmanager:/etc/alertmanager \
-v /data/server/alertmanager/data:/alertmanager \
--name alertmanager prom/alertmanager \
--config.file="/etc/alertmanager/alertmanager.yml" \
--storage.path=/alertmanager
cat << EOF >grafana.ini
##################### Grafana Configuration Example #####################
#
# Everything has defaults so you only need to uncomment things you want to
# change
# possible values : production, development
;app_mode = production
# instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty
;instance_name = ${HOSTNAME}
#################################### Paths ####################################
[paths]
# Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used)
;data = /var/lib/grafana
# Directory where grafana can store logs
;logs = /var/log/grafana
# Directory where grafana will automatically scan and look for plugins
;plugins = /var/lib/grafana/plugins
# folder that contains provisioning config files that grafana will apply on startup and while running.
;provisioning = conf/provisioning
#################################### Server ####################################
[server]
# Protocol (http, https, socket)
protocol = http
# The ip address to bind to, empty will bind to all interfaces
;http_addr =
# The http port to use
http_port = 3000
# The public facing domain name used to access grafana from a browser
;domain = localhost
# Redirect to correct domain if host header does not match domain
# Prevents DNS rebinding attacks
;enforce_domain = false
# The full public facing url you use in browser, used for redirects and emails
# If you use reverse proxy and sub path specify full url (with sub path)
;root_url = http://localhost:3000
# Log web requests
;router_logging = false
# the path relative working path
;static_root_path = public
# enable gzip
;enable_gzip = false
# https certs & key file
;cert_file =
;cert_key =
# Unix socket path
;socket =
#################################### Database ####################################
[database]
# You can configure the database connection by specifying type, host, name, user and password
# as separate properties or as on string using the url properties.
# Either "mysql", "postgres" or "sqlite3", it's your choice
type = mysql
host = 192.168.16.102:3306
name = grafana
user = root
# If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;"""
password =123456
# Use either URL or the previous fields to configure the database
# Example: mysql://user:secret@host:port/database
;url =
# For "postgres" only, either "disable", "require" or "verify-full"
;ssl_mode = disable
# For "sqlite3" only, path relative to data_path setting
;path = grafana.db
# Max idle conn setting default is 2
;max_idle_conn = 2
# Max conn setting default is 0 (mean not set)
;max_open_conn =
# Connection Max Lifetime default is 14400 (means 14400 seconds or 4 hours)
;conn_max_lifetime = 14400
# Set to true to log the sql calls and execution times.
log_queries =
#################################### Session ####################################
[session]
# Either "memory", "file", "redis", "mysql", "postgres", default is "file"
;provider = file
# Provider config options
# memory: not have any config yet
# file: session dir path, is relative to grafana data_path
# redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana`
# mysql: go-sql-driver/mysql dsn config string, e.g. `user:password@tcp(127.0.0.1:3306)/database_name`
# postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable
provider_config = root:123456@tcp(192.168.16.102:3306)/grafana
# Session cookie name
cookie_name = grafana_sess
# If you use session in https only, default is false
cookie_secure = false
# Session life time, default is 86400
session_life_time = 86400
#################################### Data proxy ###########################
[dataproxy]
# This enables data proxy logging, default is false
;logging = false
#################################### Analytics ####################################
[analytics]
# Server reporting, sends usage counters to stats.grafana.org every 24 hours.
# No ip addresses are being tracked, only simple counters to track
# running instances, dashboard and error counts. It is very helpful to us.
# Change this option to false to disable reporting.
;reporting_enabled = true
# Set to false to disable all checks to https://grafana.net
# for new vesions (grafana itself and plugins), check is used
# in some UI views to notify that grafana or plugin update exists
# This option does not cause any auto updates, nor send any information
# only a GET request to http://grafana.com to get latest versions
;check_for_updates = true
# Google Analytics universal tracking code, only enabled if you specify an id here
;google_analytics_ua_id =
#################################### Security ####################################
[security]
# default admin user, created on startup
;admin_user = admin
# default admin password, can be changed before first start of grafana, or in profile settings
;admin_password = admin
# used for signing
;secret_key = SW2YcwTIb9zpOOhoPsMm
# Auto-login remember days
;login_remember_days = 7
;cookie_username = grafana_user
;cookie_remember_name = grafana_remember
# disable gravatar profile images
;disable_gravatar = false
# data source proxy whitelist (ip_or_domain:port separated by spaces)
;data_source_proxy_whitelist =
# disable protection against brute force login attempts
;disable_brute_force_login_protection = false
#################################### Snapshots ###########################
[snapshots]
# snapshot sharing options
;external_enabled = true
;external_snapshot_url = https://snapshots-origin.raintank.io
;external_snapshot_name = Publish to snapshot.raintank.io
# remove expired snapshot
;snapshot_remove_expired = true
#################################### Dashboards History ##################
[dashboards]
# Number dashboard versions to keep (per dashboard). Default: 20, Minimum: 1
;versions_to_keep = 20
#################################### Users ###############################
[users]
# disable user signup / registration
;allow_sign_up = true
# Allow non admin users to create organizations
;allow_org_create = true
# Set to true to automatically assign new users to the default organization (id 1)
;auto_assign_org = true
# Default role new users will be automatically assigned (if disabled above is set to true)
;auto_assign_org_role = Viewer
# Background text for the user field on the login page
;login_hint = email or username
# Default UI theme ("dark" or "light")
;default_theme = dark
# External user management, these options affect the organization users view
;external_manage_link_url =
;external_manage_link_name =
;external_manage_info =
# Viewers can edit/inspect dashboard settings in the browser. But not save the dashboard.
;viewers_can_edit = false
[auth]
# Set to true to disable (hide) the login form, useful if you use OAuth, defaults to false
;disable_login_form = false
# Set to true to disable the signout link in the side menu. useful if you use auth.proxy, defaults to false
;disable_signout_menu = false
# URL to redirect the user to after sign out
;signout_redirect_url =
#################################### Anonymous Auth ##########################
[auth.anonymous]
# enable anonymous access
;enabled = false
# specify organization name that should be used for unauthenticated users
;org_name = Main Org.
# specify role for unauthenticated users
;org_role = Viewer
#################################### Github Auth ##########################
[auth.github]
;enabled = false
;allow_sign_up = true
;client_id = some_id
;client_secret = some_secret
;scopes = user:email,read:org
;auth_url = https://github.com/login/oauth/authorize
;token_url = https://github.com/login/oauth/access_token
;api_url = https://api.github.com/user
;team_ids =
;allowed_organizations =
#################################### Google Auth ##########################
[auth.google]
;enabled = false
;allow_sign_up = true
;client_id = some_client_id
;client_secret = some_client_secret
;scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email
;auth_url = https://accounts.google.com/o/oauth2/auth
;token_url = https://accounts.google.com/o/oauth2/token
;api_url = https://www.googleapis.com/oauth2/v1/userinfo
;allowed_domains =
#################################### Generic OAuth ##########################
[auth.generic_oauth]
;enabled = false
;name = OAuth
;allow_sign_up = true
;client_id = some_id
;client_secret = some_secret
;scopes = user:email,read:org
;auth_url = https://foo.bar/login/oauth/authorize
;token_url = https://foo.bar/login/oauth/access_token
;api_url = https://foo.bar/user
;team_ids =
;allowed_organizations =
#################################### Grafana.com Auth ####################
[auth.grafana_com]
;enabled = false
;allow_sign_up = true
;client_id = some_id
;client_secret = some_secret
;scopes = user:email
;allowed_organizations =
#################################### Auth Proxy ##########################
[auth.proxy]
;enabled = false
;header_name = X-WEBAUTH-USER
;header_property = username
;auto_sign_up = true
;ldap_sync_ttl = 60
;whitelist = 192.168.1.1, 192.168.2.1
#################################### Basic Auth ##########################
[auth.basic]
;enabled = true
#################################### Auth LDAP ##########################
[auth.ldap]
;enabled = false
;config_file = /etc/grafana/ldap.toml
;allow_sign_up = true
#################################### SMTP / Emailing ##########################
[smtp]
;enabled = false
;host = localhost:25
;user =
# If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;"""
;password =
;cert_file =
;key_file =
;skip_verify = false
;from_address = [email protected]
;from_name = Grafana
# EHLO identity in SMTP dialog (defaults to instance_name)
;ehlo_identity = dashboard.example.com
[emails]
;welcome_email_on_sign_up = false
#################################### Logging ##########################
[log]
# Either "console", "file", "syslog". Default is console and file
# Use space to separate multiple modes, e.g. "console file"
;mode = console file
# Either "debug", "info", "warn", "error", "critical", default is "info"
;level = info
# optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug
;filters =
# For "console" mode only
[log.console]
;level =
# log line format, valid options are text, console and json
;format = console
# For "file" mode only
[log.file]
;level =
# log line format, valid options are text, console and json
;format = text
# This enables automated log rotate(switch of following options), default is true
;log_rotate = true
# Max line number of single file, default is 1000000
;max_lines = 1000000
# Max size shift of single file, default is 28 means 1 << 28, 256MB
;max_size_shift = 28
# Segment log daily, default is true
;daily_rotate = true
# Expired days of log file(delete after max days), default is 7
;max_days = 7
[log.syslog]
;level =
# log line format, valid options are text, console and json
;format = text
# Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used.
;network =
;address =
# Syslog facility. user, daemon and local0 through local7 are valid.
;facility =
# Syslog tag. By default, the process' argv[0] is used.
;tag =
#################################### Alerting ############################
[alerting]
# Disable alerting engine & UI features
;enabled = true
# Makes it possible to turn off alert rule execution but alerting UI is visible
;execute_alerts = true
#################################### Explore #############################
[explore]
# Enable the Explore section
;enabled = false
#################################### Internal Grafana Metrics ##########################
# Metrics available at HTTP API Url /metrics
[metrics]
# Disable / Enable internal metrics
;enabled = true
# Publish interval
;interval_seconds = 10
# Send internal metrics to Graphite
[metrics.graphite]
# Enable by setting the address setting (ex localhost:2003)
;address =
;prefix = prod.grafana.%(instance_name)s.
#################################### Distributed tracing ############
[tracing.jaeger]
# Enable by setting the address sending traces to jaeger (ex localhost:6831)
;address = localhost:6831
# Tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2)
;always_included_tag = tag1:value1
# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
;sampler_type = const
# jaeger samplerconfig param
# for "const" sampler, 0 or 1 for always false/true respectively
# for "probabilistic" sampler, a probability between 0 and 1
# for "rateLimiting" sampler, the number of spans per second
# for "remote" sampler, param is the same as for "probabilistic"
# and indicates the initial sampling rate before the actual one
# is received from the mothership
;sampler_param = 1
#################################### Grafana.com integration ##########################
# Url used to to import dashboards directly from Grafana.com
[grafana_com]
;url = https://grafana.com
#################################### External image storage ##########################
[external_image_storage]
# Used for uploading images to public servers so they can be included in slack/email messages.
# you can choose between (s3, webdav, gcs, azure_blob, local)
;provider =
[external_image_storage.s3]
;bucket =
;region =
;path =
;access_key =
;secret_key =
[external_image_storage.webdav]
;url =
;public_url =
;username =
;password =
[external_image_storage.gcs]
;key_file =
;bucket =
;path =
[external_image_storage.azure_blob]
;account_name =
;account_key =
;container_name =
[external_image_storage.local]
# does not require any configuration
[database]
type = mysql
host = 192.168.16.102:3306
name = grafana
user = root
password =123456
log_queries =
[session]
provider_config = root:123456@tcp(192.168.16.102:3306)/grafana
cookie_name = grafana_sess
cookie_secure = false
session_life_time = 86400
[auth.ldap]
enabled = true
config_file = /etc/grafana/ldap.toml
cat << EOF > ldap.toml
[[servers]]
host = "ldap.alongparty.cn"
port = 389
#root_ca_cert = /path/to/certificate.crt //证书没设置
# Search user bind dn
bind_dn = 'cn=admin,dc=baibu,dc=la'
# Search user bind password
bind_password = "kbsonlong"
# uid登录
search_filter = "(uid=%s)"
#search_filter = "(mail=%s)" //邮箱登录 [email protected]
search_base_dns = ['ou=People,dc=baibu,dc=la']
group_search_filter = "(&(objectClass=posixGroup)(memberUid=%s))"
#group_search_base_dns = ["ou=Groups,dc=baibu,dc=la"]
#group_search_filter_user_attribute = "uid"
[servers.attributes]
name = "cn"
surname = "sn"
username = "cn"
email = "mail"
member_of = "memberOf"
# 运维组
[[servers.group_mappings]]
group_dn = "cn=yunweizu,ou=Group,dc=baibu,dc=la"
org_role = "Admin"
grafana_admin = true
[[servers.group_mappings]]
# 所有人都是该org的viewer
group_dn = "*"
org_role = "Viewer"
EOF
docker run -d --name grafana --net=host --restart=always \
--log-driver json-file --log-opt max-size=10m --log-opt max-file=7\
-e TZ="Asia/Shanghai" \
-e "GF_SERVER_ROOT_URL=http://grafana.server.name" \
-e "GF_SECURITY_ADMIN_PASSWORD=newpwd" \
-v "/data/server/grafana/data:/var/lib/grafana" \
-v "/data/server/grafana/logs:/var/log/grafana" \
-v "/data/server/grafana/grafana.ini:/etc/grafana/grafana.ini" \
-v "/data/server/grafana/ldap.toml:/etc/grafana/ldap.toml" \
grafana/grafana grafana