使用 Prometheus监控 node_exporter节点,当应用很多,且上下线频繁时,需要不断的更改 Prometheus 的配置文件,不能灵活的使用,可以通过为 Prometheus配置注册中心,从注册中心拉取应用数据获取监控数据。
mkdir -p /usr/local/consul/
cd /usr/local/consul/
unzip consul_1.9.17_linux_amd64.zip
cp consul /usr/local/bin/
sudo groupadd --system consul
sudo useradd -s /sbin/nologin --system -g consul consul
mkdir -p /data/consul/{config,data,logs}
chown -R consul.consul /data/consul
chmod -R 775 /data/consul
vi config/consul.json
{
"advertise_addr": "10.0.0.42", //本机ip
"bind_addr": "10.0.0.42", //本机ip
"bootstrap_expect": 1, //节点个数
"client_addr": "0.0.0.0",
"client_addr": "0.0.0.0",
"datacenter": "GuiZhou1", //数据中心名称
"node_name": "huaconsul1", //节点名称(可以多点)
"data_dir": "/data/consul/data", //存放路径
"enable_script_checks": true,
"disable_update_check": true,
"domain": "consul",
"dns_config": {
"enable_truncate": true,
"only_passing": true
},
"connect": {
"enabled": true
},
"ports": {
"dns": 8600 //consul的接口
},
"performance": {
"raft_multiplier": 1
},
"enable_syslog": false,
"log_file":"/data/consul/logs/consul.log", //日志路径
"log_rotate_bytes":10485760, //日志大小
"log_rotate_max_files": 7, //日志转储最大文件个数
"log_rotate_duration":"24h",
"log_json": true,
"encrypt": "m51qMazqGYVYSfJJLBn09jcMPMXHJ5vqfkhu5MO5L84=",
"leave_on_terminate": true,
"log_level": "debug",
"rejoin_after_leave": true,
// "retry_join": [ //部署单点不需要
// "huaconsul1" //节点名称
// ],
"server": true,
// "start_join": [
// "huaconsul1" //节点名称
// ],
"ui": true,
"ui_config": [
{
"enabled": true
}
]
}
启动命令
nohup /usr/local/bin/consul agent -config-dir=/data/consul/config/consul.json &
#查看日志是否异常
tailf nohup.out
#查看端口
netstat -antpl |grep consul
1.9,本地浏览器访问consul
http://10.0.0.42:8500/
cd /usr/local/
tar xf node_exporter-1.5.0.linux-amd64.tar.gz
mv node_exporter-1.5.0.linux-amd64 node_exporter
cd node_exporter/
#编辑启动脚本
cat >start.sh<<\EOF
#!bin/bash
nohup /usr/local/node_exporter/node_exporter --collector.tcpstat &
EOF
sh start.sh
#查看node_exporter日志
tailf nohup.out
#cur访问node_exporter
curl 10.0.0.41:9100/metrics
mkdir -p /usr/local/reg
cd /usr/local/reg
cat >/usr/local/reg/reg.sh<<\EOF
#!/bin/bash
##定义主机名称变量
HOST=`hostname`
##定义网卡名称变量
machine_physics_net=$(ls /sys/class/net/ | grep -v "`ls /sys/devices/virtual/net/`")
##定义节点ip变量
IP=$(ip addr | grep "$machine_physics_net" | awk '/^[0-9]+: / {}; /inet.*global/ {print gensub(/(.*)\/(.*)/, "\\1", "g", $2)}')
##定义主机名称
server_name=`hostname | awk -F'.' '{print $1}'`
##获取以上变量信息进行自动注册到consul平台
curl -X PUT -d '{"id": "'$HOST'","name": "GuiZhou-Linux-Pool","address": "'$IP'","port": 9100,"tags": ["linux_system"],"meta": {"server_name": "'$server_name'", "machine_ip":"'$IP'", "monitoring-items": "linux_base"}, "checks": [{"http": "http://'$IP':9100/metrics", "interval": "5s"}]}' http://10.0.0.42:8500/v1/agent/service/register > /dev/null 2>&1 && echo -e "\033[92m $HOST 注册成功 \033[0m"
EOF
##注销注册
cat >der.sh<<\EOF
#!/bin/bash
CONSUL_ADDRESS="10.0.0.42:8500"
HOST=`hostname`
#server_name=`hostname | awk -F'.' '{print $1}'`
curl -X PUT http://${CONSUL_ADDRESS}/v1/agent/service/deregister/${hostname}
echo -e "\033[92m $HOST 已删除 \033[0m"
EOF
#执行脚本,做consul注册发现
sh reg.sh
注意:http://10.0.0.42:8500/v1/agent/service/register 为consul接口信息
#注销注册
curl --request PUT http://10.0.0.42:8500/v1/agent/service/deregister/10.0.0.41
cd /usr/local/
tar xf prometheus-2.37.1.linux-amd64.tar.gz
mv prometheus-2.37.1.linux-amd64 prometheus
cd prometheus/
vi prometheus.yml
scrape_configs:
# The job name is added as a label `job=` to any timeseries scraped from this config.
- job_name: "prometheus" ##默认本机节点名称
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"] #默认本机的节点,不需要修改,(以上为promethus自带信息)
- job_name: 'gz-linux-pool' ##创建consul的监控节点名称
scrape_interval: 15s
scrape_timeout: 5s
consul_sd_configs: # 配置基于consul的服务发现
- server: '10.0.0.42:8500' ##promethus拉取consul的接口地址
refresh_interval: 30s
services: [GuiZhou-Linux_Pool]
relabel_configs: # 对默认的Metadata进行自定义Relabeling
- source_labels: [__meta_consul_service_address]
target_label: 'ipaddress'
- source_labels: [__meta_consul_service_metadata_instance]
target_label: 'instance'
- source_labels: [__meta_consul_service_metadata_group]
target_label: 'group'
- source_labels: [__meta_consul_service_metadata_environment]
target_label: 'environment'
- source_labels: [__meta_consul_service_metadata_Project]
target_label: 'Project'
- source_labels: [__meta_consul_service]
target_label: 'service'
cat >start.sh<<\EOF
#!/bin/bash
PROMETHEUS_PATH=/usr/local/prometheus-2.43.0.linux-amd64
nohup $PROMETHEUS_PATH/prometheus --config.file=$PROMETHEUS_PATH/prometheus.yml \
--storage.tsdb.retention.time=7d \
--query.timeout=2m \
--web.read-timeout=5m \
--web.enable-lifecycle \
--web.external-url=prometheus &
EOF
sh start.sh
或者
#赋权
chown -R root. /usr/local/prometheus
添加环境变量
echo 'export PATH=/usr/local/prometheus:$PATH' >> /etc/profile && source /etc/profile
#做启动项
cat >/usr/lib/systemd/system/prometheus.service<<\EOF
[Unit]
Description=Prometheus
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
User=root
ExecReload=/bin/kill -HUP $MAINPID
ExecStart=/usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml \
--storage.tsdb.path=/usr/local/prometheus/data \
--storage.tsdb.retention=15d \
--web.console.libraries=/usr/local/prometheus/console_libraries \
--web.console.templates=/usr/local/prometheus/consoles \
--web.listen-address=0.0.0.0:9090 \
--web.read-timeout=5m \
--web.max-connections=10 \
--query.max-concurrency=20 \
--query.timeout=2m \
--web.enable-lifecycle
PrivateTmp=true
PrivateDevices=true
ProtectHome=true
NoNewPrivileges=true
LimitNOFILE=infinity
ReadWriteDirectories=/usr/local/prometheus
ProtectSystem=full
SyslogIdentifier=prometheus
Restart=always
[Install]
WantedBy=multi-user.target
EOF
#加载
systemctl daemon-reload
#添加开机自启
systemctl enable prometheus
#启动
systemctl start prometheus
#查看状态
systemctl status prometheus
#查看端口
lsof -i:9090
curl -X POST http://10.0.0.42:9090/-/reload