目录
- 前言
- 脚本
- 配置文件
- 模板文件
前言
之前写过一篇文章 Zabbix设置自定义监控项之——监控tcp连接状态。
最近完善了一点东西,也算是对第一个版本的升级。
升级的内容:
- 脚本通过
python
实现。 - 监控项类型为
zabbix
采集器,使用zabbix_sender
进行发送数据到服务端。 - 增加了一个触发器,对 close_wait 状态的预警.
脚本
/var/lib/zabbix/ 这个目录要事先自己创建。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020/3/12 9:29
# @Author : dongjiaxiao
# @Email :
# @File : tcp_connect_status_monitor.py
# @Desc :
import sys
import os
import subprocess
def execute_cmd(cmd):
"""
命令执行,并获取返回状态与执行结果
:param cmd: 要执行的命令
:return: 字典包含 执行的状态码和执行的正常和异常输出, 0为正常,1为异常
"""
cmd_res = {'status': 1, 'stdout': '', 'stderr': ''}
try:
res = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
res_stdout, res_stderr = res.communicate()
cmd_res['status'] = res.returncode
cmd_res['stdout'] = res_stdout.replace('\n', '').strip()
cmd_res['stderr'] = res_stderr.replace('\n', '').strip()
except Exception as e:
cmd_res['stderr'] = e
finally:
return cmd_res
def tcp_connect_status_value(zabbix_conf_path,tcp_conn_status_cmd):
"""
发送数据到 zabbix 服务端,监控项类型需为 zabbix 采集器
:param zabbix_conf_path: zabbix conf 路径
:param tcp_connect_status_cmd: 获取tcp连接信息的执行命令
:return: send_data_flg['status'] 发送数据是否成功 ,1为成功,0为失败,send_data_flg['error_info'] 为错误信息。
"""
tcp_conn_status_dic = {
'CLOSED': 0,
'CLOSE_WAIT': 0,
'CLOSING': 0,
'ESTABLISHED': 0,
'FIN_WAIT1': 0,
'FIN_WAIT2': 0,
'LAST_ACK': 0,
'LISTEN': 0,
'SYN_RECV': 0,
'SYN_SENT': 0,
'TIME_WAIT': 0,
} # tcp 连接状态码
tmp_file = "/var/lib/zabbix/zabbix_sender_tcp_connect_status_file.txt" # 临时文件,用完会删除
send_data_flg = {'status': 0, 'error_info': ''} # 发送数据状态与信息记录
sender_bin_ret = execute_cmd("which zabbix_sender")
if sender_bin_ret['status'] == 0: # 判断是否安装 zabbix_sender 是否存在
zabbix_sender_bin = sender_bin_ret['stdout']
else:
send_data_flg['error_info'] = "zabbix sender 未安装" + sender_bin_ret['stderr']
return send_data_flg
tcp_connect_status_ret = execute_cmd(tcp_conn_status_cmd) # 执行命令获取tcp连接信息
if tcp_connect_status_ret['status'] == 0:
tcp_connect_status_data = tcp_connect_status_ret['stdout'].split()
front_data = 0 # 记录列表当前数据的前一个数据,因为 tcp 数量的值在状态的前一行。
for tcp_status in list(tcp_conn_status_dic.keys()): # 遍历tcp状态码然后进行对比
for tcp_status_data in tcp_connect_status_data:
if tcp_status == tcp_status_data:
tcp_conn_status_dic[tcp_status] = front_data
front_data = tcp_status_data
with open(tmp_file, mode='w') as f_zs: # 将要发送的数据写入临时文件
for status, number in tcp_conn_status_dic.items():
f_zs.write("- custom.tcp.connect.status[{0}] {1} \n".format(status.lower(), number))
sender_data_ret = execute_cmd('{0} -c {1} -i {2}'.format(zabbix_sender_bin, zabbix_conf_path, tmp_file)) # 发送到 zabbix server
if sender_data_ret['status'] == 0 and sender_data_ret['stdout'].find(
"failed: 0") >= 0: # 当命令执行成功,并且找到"failed: 0"
send_data_flg['status'] = 1
os.remove(tmp_file) # 删除临时文件
else:
send_data_flg['error_info'] = sender_data_ret['stdout'] + " or failed is not eq 0"
else:
send_data_flg['error_info'] = "{0} 执行失败 ,result:{1}".format(tcp_conn_status_cmd, tcp_connect_status_ret['stderr'])
return send_data_flg
if __name__ == '__main__':
tcp_conn_status_cmd = "netstat -ant|awk '{print $NF}' |sort |uniq -c" # 获取连接状态的信息
zabbix_conf_path = '/etc/zabbix/zabbix_agentd.conf ' # zabbix 配置文件
if len(sys.argv) == 2 and sys.argv[1] == "sender_data": # 发送监控数据到服务端,单独监控项判断是否发送成功
sender_status = tcp_connect_status_value(zabbix_conf_path, tcp_conn_status_cmd)
print(sender_status['status'])
# print(sender_status['error_info']) # 打印错误信息,便于调试。
else:
print("please use parameter sender_data")
配置文件
UserParameter=custom.tcp.connect.status,python /etc/zabbix/scripts/tcp_connect_status_monitor.py sender_data
模板文件
4.0
2020-03-13T10:24:49Z
Templates
Template_Linux_TCP_Connect_Status_Monitor
Template_Linux_TCP_Connect_Status_Monitor
Templates
TCP Connect Status
-
TCP Connect Status Sender Data Status
0
custom.tcp.connect.status
1m
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
-
TCP Connect Status CLOSED
2
custom.tcp.connect.status[closed]
0
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
-
TCP Connect Status CLOSE_WAIT
2
custom.tcp.connect.status[close_wait]
0
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
-
TCP Connect Status CLOSING
2
custom.tcp.connect.status[closing]
0
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
-
TCP Connect Status ESTABLISHED
2
custom.tcp.connect.status[established]
0
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
-
TCP Connect Status FIN_WAIT1
2
custom.tcp.connect.status[fin_wait1]
0
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
-
TCP Connect Status FIN_WAIT2
2
custom.tcp.connect.status[fin_wait2]
0
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
-
TCP Connect Status LAST_ACK
2
custom.tcp.connect.status[last_ack]
0
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
-
TCP Connect Status LISTEN
2
custom.tcp.connect.status[listen]
0
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
-
TCP Connect Status SYN_RECV
2
custom.tcp.connect.status[syn_recv]
0
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
-
TCP Connect Status SYN_SENT
2
custom.tcp.connect.status[syn_sent]
0
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
-
TCP Connect Status TIME_WAIT
2
custom.tcp.connect.status[time_wait]
0
90d
365d
0
3
0
0
0
0
0
TCP Connect Status
3s
200
1
0
0
0
0
0
0
0
{Template_Linux_TCP_Connect_Status_Monitor:custom.tcp.connect.status.count(#3,0,eq)}=3
1
{Template_Linux_TCP_Connect_Status_Monitor:custom.tcp.connect.status.last()}=1
Linux TCP Connect Status Sender Data Failed
0
0
3
0
1
{Template_Linux_TCP_Connect_Status_Monitor:custom.tcp.connect.status[close_wait].avg(1h)}>=500
0
There are too many TCP CLOSE_WAIT status
0
0
2
0
1
{Template_Linux_TCP_Connect_Status_Monitor:custom.tcp.connect.status[time_wait].last()}>10000
0
There are too many TCP TIME_WAIT status
0
0
4
0
0
TCP Status
900
200
0.0000
100.0000
1
1
0
1
0
0.0000
0.0000
0
0
0
0
0
0
C80000
0
2
0
-
Template_Linux_TCP_Connect_Status_Monitor
custom.tcp.connect.status[closed]
1
0
00C800
0
2
0
-
Template_Linux_TCP_Connect_Status_Monitor
custom.tcp.connect.status[close_wait]
2
0
0000C8
0
2
0
-
Template_Linux_TCP_Connect_Status_Monitor
custom.tcp.connect.status[closing]
3
0
C800C8
0
2
0
-
Template_Linux_TCP_Connect_Status_Monitor
custom.tcp.connect.status[established]
4
0
00C8C8
0
2
0
-
Template_Linux_TCP_Connect_Status_Monitor
custom.tcp.connect.status[fin_wait1]
5
0
C8C800
0
2
0
-
Template_Linux_TCP_Connect_Status_Monitor
custom.tcp.connect.status[fin_wait2]
6
0
C8C8C8
0
2
0
-
Template_Linux_TCP_Connect_Status_Monitor
custom.tcp.connect.status[last_ack]
7
0
960000
0
2
0
-
Template_Linux_TCP_Connect_Status_Monitor
custom.tcp.connect.status[listen]
8
0
009600
0
2
0
-
Template_Linux_TCP_Connect_Status_Monitor
custom.tcp.connect.status[syn_recv]
9
0
000096
0
2
0
-
Template_Linux_TCP_Connect_Status_Monitor
custom.tcp.connect.status[syn_sent]
10
0
960096
0
2
0
-
Template_Linux_TCP_Connect_Status_Monitor
custom.tcp.connect.status[time_wait]