#!/usr/bin/python3
# encoding: utf-8
#filename: service-detection-repair.py
#author: gaohaixiang
#writetime:202403041043
"""
# 定时任务监测示例
*/5 * * * * python3 /data/processlog/service-detection-repair.py systemctlCheck nginx
*/5 * * * * python3 /data/processlog/service-detection-repair.py processCheck /data/nginx/sbin/nginx /data/nginx/sbin/nginx
# 脚本使用示例:
# systemctl is-active nginx ,检测不是 active 以后就 systemctl restart nginx
python3 service-detection-repair.py systemctlCheck nginx
# pgrep -f /data/nginx/sbin/nginx,该路径为nginx启动的绝对路径,用于检测这个nginx是否存活,
# 若是不存在该nginx,则使用 /data/nginx/sbin/nginx 启动 nginx
# 第一个路径为检测nginx是否存活,第二个路径是绝对路径启动nginx
python3 service-detection-repair.py processCheck /data/nginx/sbin/nginx /data/nginx/sbin/nginx
"""
import time
import subprocess
import sys
import os
# 日期时间获取
def timestamp_time():
timestamp = int(time.time())
# 转换成localtime(格式和时间戳一样)
timelocal = time.localtime(timestamp)
# 转换成新的时间格式(3016-05-05 20:28:54)
datetime = time.strftime("%Y%m%d%H%M%S", timelocal)
return datetime
# 日志文件写入换行间隔
def fileWriteLine(getdatetime,filewrite):
filewrite.writelines("\n-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n")
filewrite.writelines(getdatetime+"\n")
# systemctl 进行服务检测
def systemctlCheckService(getdatetime, filewrite,checkCommand):
fileWriteLine(getdatetime, filewrite)
filewrite.writelines("systemctlCheckService\n")
filewrite.writelines(checkCommand+"\n")
try:
# 执行检查命令
status = subprocess.check_output(checkCommand, shell=True).decode('utf-8').strip()
filewrite.writelines("------服务正常------\n")
return status == 'active'
except subprocess.CalledProcessError as e:
print(f"错误,服务检测的状态是: {e}")
filewrite.writelines("------服务关闭------\n")
return False
# systemctl 进行服务重启
def systemctlRestartService(getdatetime, filewrite,restartCommand,serviceName):
fileWriteLine(getdatetime, filewrite)
filewrite.writelines("systemctlRestartService\n")
filewrite.writelines(restartCommand+"\n")
try:
# 执行重启命令
subprocess.check_call(restartCommand, shell=True)
filewrite.writelines("------服务已经被重启------\n")
print(f"服务 {serviceName} 已经被重启.")
except subprocess.CalledProcessError as e:
filewrite.writelines("------服务重启错误------\n")
print(f"服务重启错误: {e}")
# systemct 服务进行监测及重启
def systemctlCheck(getdatetime, filewrite,serviceName):
fileWriteLine(getdatetime, filewrite)
filewrite.writelines("systemctlCheck\n")
filewrite.writelines(serviceName+"\n")
# 检查服务状态的命令
checkCommand = f'systemctl is-active {serviceName}'
# 重启服务的命令
restartCommand = f'sudo systemctl restart {serviceName}'
# 检查服务是否运行
if not systemctlCheckService(getdatetime, filewrite,checkCommand):
filewrite.writelines("------服务停止. 准备进行重启.------\n")
print(f"服务 {serviceName} 停止. 准备进行重启.")
systemctlRestartService(getdatetime, filewrite,restartCommand,serviceName)
else:
filewrite.writelines("------服务正在运行中.------\n")
print(f"服务 {serviceName} 正在运行中.")
def processIsRunning(getdatetime, filewrite,servicePath):
fileWriteLine(getdatetime, filewrite)
filewrite.writelines("processIsRunning\n")
filewrite.writelines(servicePath+"\n")
try:
# 获取当前脚本的进程ID
current_pid = str(os.getpid())
# 使用pgrep检查进程是否存在,并获取所有匹配的PID
pids = subprocess.check_output(['pgrep', '-f', servicePath, '-d', '\n']).decode('utf-8').strip().split('\n')
# 移除当前脚本的PID
pids = [pid for pid in pids if pid != current_pid]
# 如果移除后仍有其他PID存在,则服务正在运行
if pids:
filewrite.writelines("------服务正在运行.------\n")
print("serviceName 正在运行")
return True
else:
filewrite.writelines("------服务已经停止.------\n")
print("serviceName 已经停止")
return False
except subprocess.CalledProcessError:
# pgrep在没有找到进程时返回非零退出状态
print("serviceName 已经停止")
filewrite.writelines("------服务已经停止.------\n")
return False
def processStartService(getdatetime,filewrite,startCommand):
fileWriteLine(getdatetime, filewrite)
filewrite.writelines("processStartService\n")
filewrite.writelines(startCommand+"\n")
try:
# 执行启动命令
subprocess.check_output(startCommand, shell=True)
filewrite.writelines("------已经开始使用这个命令进行启动.------\n")
print(f"已经开始使用这个命令进行启动: {startCommand}")
except subprocess.CalledProcessError as e:
filewrite.writelines("------使用命令进行启动发生错误.------\n")
print(f"错误,使用这个命令进行启动发生错误 {startCommand}: {e}")
def processCheck(getdatetime, filewrite,serviceMaster):
fileWriteLine(getdatetime, filewrite)
filewrite.writelines("processCheck\n")
#filewrite.writelines(startCommand+"\n")
for serviceName, serviceInfo in serviceMaster.items():
servicePath = serviceInfo['servicePath']
startCommand = serviceInfo['startCommand']
# 检查服务是否运行
if not processIsRunning(getdatetime, filewrite,servicePath):
filewrite.writelines("------服务已关闭。正在尝试启动.------\n")
print(f"{serviceName} 已关闭。正在尝试启动。")
processStartService(getdatetime, filewrite,startCommand)
else:
filewrite.writelines("------服务正在运行.------\n")
print(f"{serviceName} 正在运行。")
def main(getdatetime,filewrite):
fileWriteLine(getdatetime, filewrite)
filewrite.writelines("main\n")
# filewrite.writelines(startCommand+"\n")
if len(sys.argv) > 1:
if sys.argv[1] == "systemctlCheck" and len(sys.argv) == 3:
systemctlCheck(getdatetime,filewrite,sys.argv[2])
elif sys.argv[1] == "processCheck" and len(sys.argv) == 4:
serviceMaster = {
'serviceMaster': {
'servicePath': sys.argv[2],
'startCommand': sys.argv[3]
},
}
processCheck(getdatetime,filewrite,serviceMaster)
else:
filewrite.writelines("------脚本携带参数有误.------\n")
print("脚本携带参数有误")
else:
filewrite.writelines("------脚本没有携带参数,请携带正确的参数再运行脚本.------\n")
print("脚本没有携带参数,请携带正确的参数再运行脚本")
if __name__ == '__main__':
# 脚本及日志存放路径
logdir = "/data/processlog/"
# 日志文件
processChecklog = "processChecklog.log"
processfile = logdir + processChecklog
# 时间获取
getdatetime = timestamp_time()
filewrite = open(processfile, "a+", encoding="UTF8")
# 主函数入口
main(getdatetime, filewrite)
filewrite.close()