Prometheus 官网下载
Prometheus 开始安装
Prometheus 后台运行
Prometheus 基本配置文件
安装第一个 exporter > node_exporter
prometheus T_S 对系统时间的准确性要求很高,必须确保本机时间同步
timedatectl set-timezone Asia/Shanghai
下载最新版本官网:https://prometheus.io/download/#prometheus
wget https://github.com/prometheus/prometheus/releases/download/v2.19.2/prometheus-2.19.2.linux-amd64.tar.gz
[root@k8s-master ~]# tar -zxvf prometheus-2.19.2.linux-amd64.tar.gz
prometheus-2.19.2.linux-amd64/
prometheus-2.19.2.linux-amd64/console_libraries/
prometheus-2.19.2.linux-amd64/console_libraries/menu.lib
prometheus-2.19.2.linux-amd64/console_libraries/prom.lib
prometheus-2.19.2.linux-amd64/LICENSE
prometheus-2.19.2.linux-amd64/consoles/
prometheus-2.19.2.linux-amd64/consoles/prometheus.html
prometheus-2.19.2.linux-amd64/consoles/node-cpu.html
prometheus-2.19.2.linux-amd64/consoles/node-disk.html
prometheus-2.19.2.linux-amd64/consoles/node.html
prometheus-2.19.2.linux-amd64/consoles/prometheus-overview.html
prometheus-2.19.2.linux-amd64/consoles/index.html.example
prometheus-2.19.2.linux-amd64/consoles/node-overview.html
prometheus-2.19.2.linux-amd64/prometheus
prometheus-2.19.2.linux-amd64/promtool
prometheus-2.19.2.linux-amd64/NOTICE
prometheus-2.19.2.linux-amd64/tsdb
prometheus-2.19.2.linux-amd64/prometheus.yml
[root@k8s-master ~]# cp -rf prometheus-2.19.2.linux-amd64 /usr/local/prometheus
[root@k8s-master prometheus]# ./prometheus
在/usr/local/prometheus 目录下启动,但是会阻塞日志,采用 screen 的方法在后台运行。
level=info ts=2020-07-05T04:16:30.237Z caller=main.go:302 msg="No time or size retention was set so using the default time retention" duration=15d
level=info ts=2020-07-05T04:16:30.237Z caller=main.go:337 msg="Starting Prometheus" version="(version=2.19.2, branch=HEAD, revision=c448ada63d83002e9c1d2c9f84e09f55a61f0ff7)"
level=info ts=2020-07-05T04:16:30.237Z caller=main.go:338 build_context="(go=go1.14.4, user=root@dd72efe1549d, date=20200626-09:02:20)"
level=info ts=2020-07-05T04:16:30.237Z caller=main.go:339 host_details="(Linux 5.7.0-1.el7.elrepo.x86_64 #1 SMP Sun May 31 17:27:48 EDT 2020 x86_64 k8s-master (none))"
level=info ts=2020-07-05T04:16:30.237Z caller=main.go:340 fd_limits="(soft=1024, hard=4096)"
level=info ts=2020-07-05T04:16:30.237Z caller=main.go:341 vm_limits="(soft=unlimited, hard=unlimited)"
level=info ts=2020-07-05T04:16:30.238Z caller=main.go:678 msg="Starting TSDB ..."
level=info ts=2020-07-05T04:16:30.240Z caller=web.go:524 component=web msg="Start listening for connections" address=0.0.0.0:9090
level=info ts=2020-07-05T04:16:30.577Z caller=head.go:645 component=tsdb msg="Replaying WAL and on-disk memory mappable chunks if any, this may take a while"
level=info ts=2020-07-05T04:16:30.577Z caller=head.go:706 component=tsdb msg="WAL segment loaded" segment=0 maxSegment=0
level=info ts=2020-07-05T04:16:30.577Z caller=head.go:709 component=tsdb msg="WAL replay completed" duration=100.17µs
level=info ts=2020-07-05T04:16:30.578Z caller=main.go:694 fs_type=XFS_SUPER_MAGIC
level=info ts=2020-07-05T04:16:30.578Z caller=main.go:695 msg="TSDB started"
level=info ts=2020-07-05T04:16:30.578Z caller=main.go:799 msg="Loading configuration file" filename=prometheus.yml
level=info ts=2020-07-05T04:16:30.579Z caller=main.go:827 msg="Completed loading of configuration file" filename=prometheus.yml
level=info ts=2020-07-05T04:16:30.579Z caller=main.go:646 msg="Server is ready to receive web requests."
yum -y install screen 下载工具
screen -S prometheus 创建一个会话
./prometheus 运行启动 prometheus操作,然后断开 xshell链接,再重新开一个新的终端,
screen -ls 查看会话, screen -r ID号 进入会话
http://192.168.168.11:9090/graph
# my global config
global:
scrape_interval: 15s # 安装好之后的默认采集数据时间间隔,默认为15秒,自定义数据采集频率
evaluation_interval: 15s # 监控数据规则的评估频率,这个参数是 prometheus 多长时间会进行一次 监控规则的评估。
#举个例子:假如设置 当内存使用量 >70% 时发出报警这么一条 rule(规则),
#那么 prometheus 会默认每 15秒 来执行一次这个规则,检查内存的情况。
# Alertmanager configuration 配置 Altermanager
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager(127.0.0.1):9093 #altermanager 服务器IP地址
rule_files:
# - "first_rules.yml" #第一个告警规则文件
# - "second_rules.yml" #第二个告警规则文件
# Here it's Prometheus itself.
scrape_configs: #抓取数据的配置
# The job name is added as a label `job=` to any timeseries scraped from this config.
- job_name: 'prometheus' #任务名称
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs: #监控项的信息
- targets: ['localhost:9090'] #定义监控项
# - targetes: ['localhost:9090','server01:9100','server02:9100'] #定义多个监控项,需要重启prometheus 才能对加入节点抓取数据。
#server*必须在/etc/hosts中配置解析名。
#9100 node_exporter 的默认端口。
官网exporter下载地址:https://prometheus.io/download/#node_exporter
node_exporter - Github 地址:https://github.com/prometheus/node_exporter
tar -zxvf node_exporter-1.0.1.linux-amd64.tar.gz
cp -rf node_exporter-1.0.1.linux-amd64 /usr/local/node_exporter ; cd /usr/local/node_exporter/
sceen -S node_export
./node_exporter 开启exporter
ss -anpt | grep node
curl localhost:9100/metrics
static_configs:
- targets: ['localhost:9090','192.168.168.12:9100']
重启 prometheus,刷新 prometheus - webUI
Github 地址:https://github.com/prometheus/node_exporter
node_exporter 有足够多的监控项是默认开启的,还有不是默认开启的
不是默认开启的 exporter,需要指定才能启动,如下:
[root@k8s-node1 node_export]# ./node_exporter --help ##加上 --help,列出所有监控项
[root@k8s-node1 node_export]# ./node_exporter --collector.ipvs ##跟上想要开启的监控项启动
基础篇,看不懂正常
[root@k8s-node1 ~]# curl localhost:9100/metrics | grep -i node_cpu
node_cpu_seconds_total{cpu=“0”,mode=“idle”} 9208.76 摘出其中一个…
(1-((sum(increase(node_cpu_seconds_total{mode="idle"}[3m])) by (instance)) /(sum(increase(node_cpu_seconds_total[3m])) by (instance)))) * 100