wget https://github.com/prometheus/prometheus/releases/download/v2.45.2/prometheus-2.45.2.linux-amd64.tar.gz
tar -zxvf prometheus-2.45.2.linux-amd64.tar.gz /opt/
#创建一个用户
useradd -M -s /usr/sbin/nologin promethues
root@promethues203:~# useradd -M -s /usr/sbin/nologin prometheus
root@promethues203:~# id prometheus
uid=1000(promethues) gid=1000(promethues) groups=1000(prometheus)
root@promethues203:~#
#授权
root@promethues203:~# chown prometheus:prometheus -R /opt/prometheus-2.45.2.linux-amd64/
#创建prometheus.service服务
```
cat >> /etc/systemd/system/prometheus.service << "EOF"
[Unit]
Description=Promethues Server
Documentation=https://prometheus.io/docs/introduction/overview/
After=network-online.target
[Service]
Type=simple
User=prometheus
Group=prometheus
Restart=on-failure
ExecStart=/opt/prometheus-2.45.2.linux-amd64/prometheus \
--config.file=/opt/prometheus-2.45.2.linux-amd64/prometheus.yml \
--storage.tsdb.path=/opt/prometheus-2.45.2.linux-amd64/data \
--storage.tsdb.retention.time=60d \
--web.enable-lifecycle
[Install]
WantedBy=multi-user.target
EOF
```
systemctl start prometheus
#页面显示正常
prometheus:http://10.121.1.203:9090/graph
监控指标:http://10.121.1.203:9090/metrics
安装监控alertmanager
wget https://github.com/prometheus/alertmanager/releases/download/v0.26.0/alertmanager-0.26.0.linux-amd64.tar.gz
tar -zxvf alertmanager-0.26.0.linux-amd64.tar.gz -C /opt
chown prometheus.prometheus -R /opt/alertmanager-0.26.0.linux-amd64
#创建prometheus.service服务
```
cat >> /etc/systemd/system/alertmanager.service << "EOF"
[Unit]
Description=Alertmanager
After=network-online.target
After=network-online.target
[Service]
Type=simple
User=prometheus
Group=prometheus
ExecStart=/opt/alertmanager-0.26.0.linux-amd64/alertmanager \
--config.file=/opt/alertmanager-0.26.0.linux-amd64/alertmanager.yml \
--storage.path=/opt/alertmanager-0.26.0.linux-amd64/data
Restart=always
[Install]
WantedBy=multi-user.target
EOF
```
启动服务
systemctl start alertmanager
http://10.121.1.203:9093/
修改配置,把alertmanager 告警加入到prmetheus中
/opt/prometheus-2.45.2.linux-amd64/prometheus.yml
```
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
#增加altermanager的地址
- 10.121.1.203:9093
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
#增加警告文件名
- "alert.yml"
```
增加触发器配置文件
```
cat > /opt/prometheus-2.45.2.linux-amd64/alert.yml <<"EOF"
groups:
- name: prometheus alert
rules:
#对任何实例操过30s无法联系的情况发出警告
- alert: 服务告警
expr: up == 0
for: 30s
labels:
severity: critical
annotations:
instance: "{{ $labels.instance }}"
description: "{{ $labels.job }} 服务关闭"
EOF
```
检查是否有问题
```
root@promethues203:/opt/prometheus-2.45.2.linux-amd64# ./promtool check config prometheus.yml
Checking prometheus.yml
SUCCESS: 1 rule files found
SUCCESS: prometheus.yml is valid prometheus config file syntax
Checking alert.yml
SUCCESS: 1 rules found
root@promethues203:/opt/prometheus-2.45.2.linux-amd64#
```
重启prometheus 热加载
```
systemctl restart prometheus
#重载
curl -X POST http://localhost:9090/-/reload
```
安装grafana
https://grafana.com/grafana/download
```
wget https://dl.grafana.com/enterprise/release/grafana-enterprise-10.2.3.linux-amd64.tar.gz
tar -zxvf grafana-enterprise-10.2.3.linux-amd64.tar.gz -C /opt/
chown prometheus:prometheus -R /opt/grafana-v10.2.3/
```
创建服务
```
cat >> /etc/systemd/system/grafana-server.service << "EOF"
[Unit]
Description=Grafana server
Documentation=http://docs.grafana.org
[Service]
Type=simple
User=prometheus
Group=prometheus
Restart=on-failure
ExecStart=/opt/grafana-v10.2.3/bin/grafana-server \
--config=/opt/grafana-v10.2.3/conf/defaults.ini \
--homepath=/opt/grafana-v10.2.3
[Install]
WantedBy=multi-user.target
EOF
```
systemctl start grafana-server
http://10.121.1.203:3000/login admin/admin
安装node_exporter
```
wget https://github.com/prometheus/node_exporter/releases/download/v1.7.0/node_exporter-1.7.0.linux-amd64.tar.gz
tar -zxvf node_exporter-1.7.0.linux-amd64.tar.gz -C /opt/
```
chown prometheus:prometheus -R /opt/node_exporter-1.7.0.linux-amd64
node_exporter 服务
```
cat >> /etc/systemd/system/node_exporter.service << "EOF"
[Unit]
Description=node_exporter
Documentation=http://prometheus.io
After=network.target
[Service]
Type=simple
User=prometheus
Group=prometheus
ExecStart=/opt/node_exporter-1.7.0.linux-amd64/node_exporter
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
```
systemctl daemon-reload
systemctl restart node_exporter
http://10.121.1.203:9100/metrics
修改prometheus 添加node_exporter
node_exporter
```
cat >> /opt/prometheus-2.45.2.linux-amd64/prometheus.yml <<"EOF"
# node-exporter配置
- job_name: "node_exporter"
scrape_interval: 15s
static_configs:
- targets: ["10.121.1.203:9100"]
labels:
instance: Prometheus server
EOF
```
重启下prometheus
curl -X POST http://10.121.1.203:9090/-/reload
grafana 添加数据源
```Home>Connections>Data sources>prometheus 添加prometheus
```
添加node_exporter dashboard 模板
下载模板地址 : https://grafana.com/grafana/dashboards/1860-node-exporter-full
10.121.1.203:3000
导入路径 Home 》Dashboards 》Import dashboard