Flask监控程序:Python脚本监控Flask,异常重启发送邮件

摘要:Flaskgunicorn

利用gunicorn部署Flask应用

编写shell脚本利用gunicorn部署Flask

# vim run.sh
#! /bin/bash
cd /home/gp/myproject/my_web
gunicorn -c gun.conf.py -D manage:app

其中gun.conf.py指定了pid文件名在项目目录下gunicorn.pid

# cat gunicorn.pid
9306

监控脚本

从gunicorn.pid获取进程号,使用psutil模块每隔几秒检查进程中是否存在该进程,如果不存在重启3次,在Python中调用shell命令执行run.sh,并且发送邮件告知程序异常和是否重启成功,其中发送邮件采用Python自带的模块smtplibemail

import os
import time
import datetime
import smtplib
import traceback
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.header import Header

import config
from utils import get_host_ip, get_pid_from_pid_file, check_pid, ERROR_HTML, RESTART_SUCCESS_HTML, RESTART_FAIL_HTML

IP = get_host_ip()
SERVER_NAME = config.get_string("server.name")
PID_FILE_PATH = config.get_string("pid.file.path")
SMTP_HOST = config.get_string("smtp.host")
SMTP_PORT = int(config.get_string("smtp.port"))
FROM_EMAIL_ACCOUNT = config.get_string("from.email.account")
FROM_EMAIL_PASSWORD = config.get_string("from.email.password")
TO_EMAIL_ACCOUNT = config.get_string("to.email.account")
RETRY_COUNT = int(config.get_string("retry.count"))
SLEEP_TIME = int(config.get_string("sleep.time"))


def send_error_email():
    conn = None
    try:
        conn = smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT)
        conn.login(FROM_EMAIL_ACCOUNT, FROM_EMAIL_PASSWORD)
        msg = MIMEMultipart()
        subject = Header("{}:{}应用进程死亡了".format(IP, SERVER_NAME), 'utf-8').encode()
        msg['Subject'] = subject
        msg['From'] = FROM_EMAIL_ACCOUNT
        msg['To'] = TO_EMAIL_ACCOUNT
        text = MIMEText(ERROR_HTML.format(IP, SERVER_NAME, datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S")),
                        'html', 'utf-8')
        msg.attach(text)
        conn.sendmail(FROM_EMAIL_ACCOUNT, TO_EMAIL_ACCOUNT.split(","), msg.as_string())
    except Exception as e:
        traceback.print_exc()
    finally:
        if conn:
            conn.quit()


def send_restart_email(success=True):
    timestamp = datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S")
    subject = Header("{}:{}应用已经重启".format(IP, SERVER_NAME), 'utf-8').encode()
    text = MIMEText(RESTART_SUCCESS_HTML.format(IP, SERVER_NAME, timestamp), 'html', 'utf-8')
    if not success:
        subject = Header("{}:{}应用重启失败".format(IP, SERVER_NAME), 'utf-8').encode()
        text = MIMEText(RESTART_FAIL_HTML.format(IP, SERVER_NAME, timestamp), 'html', 'utf-8')
    conn = None
    try:
        conn = smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT)
        conn.login(FROM_EMAIL_ACCOUNT, FROM_EMAIL_PASSWORD)
        msg = MIMEMultipart()
        msg['Subject'] = subject
        msg['From'] = FROM_EMAIL_ACCOUNT
        msg['To'] = TO_EMAIL_ACCOUNT
        msg.attach(text)
        conn.sendmail(FROM_EMAIL_ACCOUNT, TO_EMAIL_ACCOUNT.split(","), msg.as_string())
    except Exception as e:
        traceback.print_exc()
    finally:
        if conn:
            conn.quit()


def restart_server():
    os.system(config.get_string("start.gunicorn.cmd"))
    time.sleep(3)
    pid_num = get_pid_from_pid_file(PID_FILE_PATH)
    if check_pid(pid_num):
        send_restart_email(success=True)
        return "success"
    else:
        send_restart_email(success=False)
        return "fail"


if __name__ == '__main__':
    dead = None
    while True:
        pid_num = get_pid_from_pid_file(PID_FILE_PATH)
        result = check_pid(pid_num)
        if result:
            dead = False
        if not result and not dead:
            send_error_email()
            count = 1
            status = None
            while count <= RETRY_COUNT and status != "success":
                print("-----------------重启:{}".format(datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S")))
                status = restart_server()
                count += 1
            if status == "fail":
                dead = True
        time.sleep(SLEEP_TIME)

utils.py编写辅助功能包括获取IP地址,获取pid,检查pid等。

import traceback
import socket

import psutil


def get_host_ip():
    """获得本机的ip地址"""
    s = None
    ip = None
    try:
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(('8.8.8.8', 80))
        ip = s.getsockname()[0]
    except Exception as e:
        print("获取ip地址错误")
        traceback.print_exc()
    finally:
        if s:
            s.close()
    return ip


def get_pid_from_pid_file(pid_text_path):
    """通过文件获得pid"""
    with open(pid_text_path) as pid_text:
        pid_num = int(pid_text.read().strip())
    return pid_num


def check_pid(pid_num):
    """检查进程中是否有pid"""
    if psutil.pid_exists(pid_num):
        return True
    return False


ERROR_HTML = """

[WARNING]监控报警

Endpoint:{}

ServerName:{}

Note:应用进程死亡了

Timestamp:{}

""" RESTART_SUCCESS_HTML = """

[WARNING]监控报警

Endpoint:{}

ServerName:{}

Note:应用已经重启

Timestamp:{}

""" RESTART_FAIL_HTML = """

[WARNING]监控报警

Endpoint:{}

ServerName:{}

Note:应用重启失败

Timestamp:{}

"""

config.yml读取配置

import yaml

YAML_FILE = "./etc/config.yml"


def load_yaml_config(yaml_file):
    with open(yaml_file) as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    return config


conf = load_yaml_config(YAML_FILE)


def get_string(key: str, default: str = None):
    if key in conf.keys():
        return str(conf.get(key))
    return default

监控脚本测试

后台杀死gunicorn进程,静静地等待邮件和重启。


监控程序发送邮件.png

你可能感兴趣的:(Flask监控程序:Python脚本监控Flask,异常重启发送邮件)