用python脚本做端口监控已经稳定跑了2年多了,换了2家公司都在用,监控效果很好,及时发现故障。中间更新了好几个版本,博客上还是旧的,更新下新版本的脚本。
数据库2个表,port_monitor用于记录IP端口信息,port_monitor_status用于记录告警时间状态用于判断是否发送告警。
加入多线程,生产153台机0.15秒就跑完了,之前一直都是单线程在跑,遇到一个端口不通的要等返回结果在进行下一个端口,端口故障的多了跑起来就很慢了。编程好难学。
用的是crontab跑脚本的,最短的轮询间隔是1分钟,可以用其他的定时任务工具跑更短的间隔。
# -*- coding: utf-8 -*-
from concurrent.futures import ThreadPoolExecutor as Pool
from concurrent.futures import as_completed
import time
import pymysql
import socket
import sys
import requests
import json
from datetime import datetime, timedelta
def msg(text, phone, robot):
#发送企业微信告警
headers = {'Content-Type': 'application/json;charset=utf-8'}
api_url = robot
json_text = {
"msgtype": "text",
"text": {
"content": text,
"mentioned_mobile_list": [phone]
},
}
requests.post(api_url, json.dumps(json_text), headers=headers).content
def port_check(ip, port):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
#设置端口超时时间
s.settimeout(8)
result = s.connect_ex((ip, int(port)))
return result, ip, port
def out_log(text):
#打印监控日志
with open(f'{sys.argv[0].split(".")[0]}.log', 'a') as f:
print(text, file=f)
def main():
db = pymysql.connect("127.0.0.1", user="root", passwd="root", db="zwy")
cursor = db.cursor()
cursor.execute(
"SELECT port_monitor.ip,port_monitor.ports FROM port_monitor where port_monitor.ip")
results = cursor.fetchall()
future_tasks = []
count = 0
#线程数可修改
with Pool(max_workers=10) as executor:
for ip, ports in results:
for port in ports.split(';'):
if port == '':
continue
future_tasks.append(executor.submit(port_check, ip, port))
cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
for future in as_completed(future_tasks):
#获取socket返回值,确认端口是否打开
port_status, ip, port = future.result()
count += 1
if port_status != 0:
cursor.execute(
f"SELECT ip,`port` FROM port_monitor_status WHERE ip = '{ip}' AND `port` = '{port}'")
if len(cursor.fetchall()) == 0:
cursor.execute(
f"INSERT INTO port_monitor_status(ip,port)VALUES ('{ip}', '{port}')")
db.commit()
# send 查询是否已经发送过告警信息 没发送默认值为0 发送为1
cursor.execute(
f"SELECT send FROM port_monitor_status WHERE ip = '{ip}' AND `port` = '{port}'")
if cursor.fetchone()[0] == 0:
cursor.execute(
f"UPDATE port_monitor_status SET send = 1 ,start_time = NOW() WHERE ip = '{ip}' AND port = '{port}'")
db.commit()
cursor.execute(
f"SELECT des,phone,robot FROM port_monitor WHERE ip = '{ip}'")
description, phone, robot = cursor.fetchone()
#发送告警
msg(
f"{cur_time} {description} {ip}:{port} 端口关闭,请检查", phone, robot)
out_log(f"{cur_time} {ip}:{port} check {port_status} send: 0")
else:
cursor.execute(
f"SELECT ip,`port` FROM port_monitor_status WHERE ip = '{ip}' AND `port` = '{port}'")
if len(cursor.fetchall()) == 0:
cursor.execute(
f"INSERT INTO port_monitor_status(ip,port)VALUES ('{ip}', '{port}')")
db.commit()
cursor.execute(
f"SELECT send FROM port_monitor_status WHERE ip = '{ip}' AND `port` = '{port}'")
if cursor.fetchone()[0] == 1:
cursor.execute(
f"UPDATE port_monitor_status SET send = 0 ,end_time = NOW() WHERE ip = '{ip}' AND port = '{port}'")
db.commit()
cursor.execute(
f"SELECT des,phone,robot FROM port_monitor WHERE ip = '{ip}'")
description, phone, robot = cursor.fetchone()
cursor.execute(
f"SELECT start_time,end_time from port_monitor_status where ip = '{ip}' AND port = '{port}'")
start_time, end_time = cursor.fetchone()
try:
duration = end_time - start_time
msg(
f"{cur_time} {description} {ip}:{port} 端口恢复\n开始时间 {start_time}\n结束时间 {end_time}\n持续时间 {duration}", phone, robot)
out_log(
f"{cur_time} {ip}:{port} check {port_status} send:1")
except:
msg(
f"{cur_time} {description} {ip}:{port} 端口恢复\n未记录故障时间", phone, robot)
out_log(
f"{cur_time} {ip}:{port} check {port_status} send:1")
out_log(f"{cur_time} {ip}:{port} check {port_status}")
db.close()
#统计监控端口数
out_log(f"monitor ports number:{count}")
if __name__ == '__main__':
start_time = time.time()
main()
#统计运行时间
out_log("running time:" + str(time.time() - start_time) + "seconds")
port_monitor 建表sql
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for port_monitor
-- ----------------------------
DROP TABLE IF EXISTS `port_monitor`;
CREATE TABLE `port_monitor` (
`ip` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`des` varchar(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`phone` varchar(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`ports` varchar(128) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`robot` varchar(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
PRIMARY KEY (`ip`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Records of port_monitor
-- ----------------------------
INSERT INTO `port_monitor` VALUES ('1.1.1.1', '测试系统103', '11110526422', '80;3306', 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?');
INSERT INTO `port_monitor` VALUES ('2.2.2.2', '测试系统104', '11110526424', '33256', 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=');
SET FOREIGN_KEY_CHECKS = 1;
port_monitor_status 建表sql
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for port_monitor_status
-- ----------------------------
DROP TABLE IF EXISTS `port_monitor_status`;
CREATE TABLE `port_monitor_status` (
`ip` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`port` int NULL DEFAULT NULL,
`send` int(1) UNSIGNED ZEROFILL NULL DEFAULT 0,
`start_time` datetime(0) NULL DEFAULT NULL,
`end_time` datetime(0) NULL DEFAULT NULL,
`id` int UNSIGNED NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 1106 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = DYNAMIC;
-- ----------------------------
-- Records of port_monitor_status
-- ----------------------------
INSERT INTO `port_monitor_status` VALUES ('1.1.1.1', 3306, 1, '2021-03-23 17:14:43', NULL, 1148);
INSERT INTO `port_monitor_status` VALUES ('2.2.2.2', 33256, 0, NULL, NULL, 1149);
INSERT INTO `port_monitor_status` VALUES ('2.2.2.2', 80, 1, '2021-03-23 17:14:43', NULL, 1150);
SET FOREIGN_KEY_CHECKS = 1;