python监控服务是否正常

应用服务部署到正式服务器(本例以部署到linux)后,当应用异常时能及时通知维护人员登陆服务器进行处理,本例用python编写了监控程序,达到监控应用服务的状态,了解各应用运行情况目的。

监控代码:

# -*- coding:utf-8 -*- -

import math
import os
import sys
import urllib.parse
import urllib.request
import xml.sax

import mysql.connector
import redis
import threading
import configparser


import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr

import datetime

from xml_parse import XMLHandler


CONFIG_MYSQL_SEL = 'mysql'
CONFIG_SMS_SEL = 'sms'
CONFIG_REDIS_SEL = 'redis'
CONFIG_FILE = 'sync_config.ini'

mysql_config = {'host':'', 'username':'', 'password':'', 'db':''}
sms_config = {'userId':'', 'password':'', 'pszMobis':'', 'iMobiCount':'', 'pszSubPort':'', 'reqUrl':''}
redis_config = {'host':'', 'port':'', 'password':''}

monitor_result = {'jetty':'', 'redis':'', 'mis':'', 'mysql':'', 'disk':'', 'token':''};
sms_content_tip = {'jetty':'jetty', 'redis':'redis', 'mis':'mis', 'mysql':'数据库', 'disk':'磁盘空间', 'token':'token请求'};

class monitorServer:
    
    """
       发送短信
    """
    def sendSMS(self, msg):
    
        print("短信内容:" + msg)
        sms_data = {'userId':sms_config['userId'], 'password':sms_config['password'], 'pszMobis':sms_config['pszMobis'], 'pszMsg': msg, 'iMobiCount':sms_config['iMobiCount'], 'pszSubPort':sms_config['pszSubPort']}
        sms_data_urlencode = urllib.parse.urlencode(sms_data).encode('utf-8')

        reqUrl = sms_config['reqUrl']
        headerdata = {"Content-Type":"application/x-www-form-urlencoded"}

        try:
            req = urllib.request.Request(reqUrl, sms_data_urlencode, headerdata)
            response = urllib.request.urlopen(req)
            responseStr = response.read().decode('UTF8')
            print(responseStr)
    
            xh = XMLHandler()  
            xml.sax.parseString(responseStr, xh)  
            ret = xh.getDict().get("string")  
            print("sms send result:" + ret)
            if len(ret) > 10 and len(ret) < 25:
                print("发送短信成功")
                return True
            else:
                print("发送短信失败")
                return False
        except:
            print("发送短信异常!" + str(sys.exc_info()))
            return False
        
        
    """
       发送邮件
    """
    def SendEmail(self, fromAdd, toAdd, mailContent):
  
        try:
            msg = MIMEText(mailContent,'plain','utf-8')
            msg['From']=formataddr(["发件人昵称",fromAdd])  # 括号里的对应发件人邮箱昵称、发件人邮箱账号
            msg['To']=formataddr(["收件人昵称",toAdd])              # 括号里的对应收件人邮箱昵称、收件人邮箱账号
            msg['Subject']="邮件主题-测试"                # 邮件的主题,也可以说是标题
    
            server=smtplib.SMTP_SSL("smtp.qq.com", 465)  # 发件人邮箱中的SMTP服务器,端口是465
            server.login(fromAdd, '***********')  # 括号中对应的是发件人邮箱账号、邮箱密码(发件人邮箱密码(当时申请smtp给的口令))
            server.sendmail(fromAdd, [toAdd,], msg.as_string())  # 括号中对应的是发件人邮箱账号、收件人邮箱账号、发送邮件
            server.quit()# 关闭连接
            
            return True
        except Exception:# 如果 try 中的语句没有执行,则会执行下面的 ret=False
            print("发送邮件异常!" + str(sys.exc_info()))
            return False
        

    """
       监控进程是否存在
    """
    def checkJettyStatus(self):
    
        try:
        
            strtmp = os.popen("ps -ef|grep jetty|grep -v grep") 
            #print(type(strtmp))
            cmdback = strtmp.read()
            p = str(cmdback).find('-Djetty.home=/data/jetty-9.2.25')
            print("jetty check result:" + str(p))
    
            if not p == -1:
                print('jetty server is running')
            
                monitor_result['jetty'] = True;
                return True
            else:
                print('jetty server is not run')
            
                monitor_result['jetty'] = False;
                return False
        except:
            print('check jetty exception!' + str(sys.exc_info()))
        
            monitor_result['jetty'] = False;
            return False    


    """
       监控redis是否正常
    """
    def checkRedisServer(self):
        try:
            strtmp = os.popen(" ps -ef|grep redis|grep -v grep") 
            #print(type(strtmp))
            cmdback = strtmp.read()
            p = str(cmdback).find('./redis-server 127.0.0.1:6379')
            print("redis check result:" + str(p))
    
            if not p == -1:
                print('redis is running')
            
                monitor_result['redis'] = True;
                return True
            else:
                print('redis is not run')
            
                monitor_result['redis'] = False;
                return False
        except:
            print('check redis exception!' + str(sys.exc_info()))
        
            monitor_result['redis'] = False;
            return False


    """
       监控mysql是否正常
    """
    def checkMysqlServer(self):
    
        try:
            # 打开数据库连接
            db = mysql.connector.connect(user=mysql_config['username'], password=mysql_config['password'], host=mysql_config['host'], database=mysql_config['db'])
        
            print('db connect success')
    
            # 关闭数据库连接
            db.close()
        
            monitor_result['mysql'] = True;
        
            return True
        except:
            print('db connect exception!' + str(sys.exc_info()))
        
            monitor_result['mysql'] = False;
            return False


    """
       检查请求异常错误
    """
    def checkMisServer(self):
    
        try:
            pool = redis.ConnectionPool(host=redis_config['host'], port=redis_config['port'], password=redis_config['password'])
            r = redis.Redis(connection_pool=pool)
    
            # redis缓存请求异常的key
            ret = r.get('misReqErrorCnt')
    
            if ret is None:
                ret = "0"
                
            print("mis request error times:" + str(ret))

            #此次执行后,累计reids异常清零
            r.set('misReqErrorCnt', 0)
            pool.disconnect()
        
            if int(ret) >= 10:
                print("请求异常次数:", int(ret))            
            
                monitor_result['mis'] = False;
                return False
            else:
                print("请求异常次数:", int(ret))
            
                monitor_result['mis'] = True;
                return True
        except:
            print("check mis exception!" + str(sys.exc_info()))
        
            monitor_result['mis'] = False;
            return False

    
    """
       检查mis token请求异常错误
    """
    def checkMisTokenError(self):
    
        try:
            pool = redis.ConnectionPool(host=redis_config['host'], port=redis_config['port'], password=redis_config['password'])
            r = redis.Redis(connection_pool=pool)
    
            # redis缓存token请求异常的key
            ret = r.get('misTokenErrorCnt')
    
            if ret is None:
                ret = "0"
                
            print("mis token error times:" + str(ret))

            #此次执行后,累计reids异常清零
            r.set('misTokenErrorCnt', 0)
            pool.disconnect()
        
            if int(ret) >= 3:
                print("token请求异常次数:", int(ret))            
            
                monitor_result['token'] = False;
                return False
            else:
                print("token请求异常次数:", int(ret))
            
                monitor_result['token'] = True;
                return True
        except:
            print("check mis token exception!" + str(sys.exc_info()))
        
            monitor_result['token'] = False;
            return False
    
    
    
    """
       检查磁盘空间
    """
    def checkDiskInfo(self):
        try:
            statvfs = os.statvfs('/data')
            print(statvfs)
            total = statvfs.f_frsize * statvfs.f_blocks
            print("disk total:" + str(total))
            free = statvfs.f_frsize * statvfs.f_bfree
            print("disk free:" + str(free))
            # free = (statvfs.f_bfree * statvfs.f_frsize)
            # total = (statvfs.f_blocks * statvfs.f_frsize)
            # used = (statvfs.f_blocks - statvfs.f_bfree) * statvfs.f_frsize
            # print(used)
            disk_usage = (total - free) * 100.0 / total
        
            disk_usage = math.ceil(disk_usage)
        
            print("硬盘空间使用率(最大100%):" + str(disk_usage) + "%")
        
            # 磁盘空间使用达到80%时告警
            if disk_usage >= 90:
                monitor_result['disk'] = False;
                return False
        
            monitor_result['disk'] = True;
            return True
        except:
            print("check disk exception!" + str(sys.exc_info()))
        
            monitor_result['disk'] = False;
            return False   
"""
   初始化配置文件
"""
def init_config():
    try:
        
        cfg = configparser.ConfigParser()
        cfg.read(CONFIG_FILE, encoding="utf-8-sig")

        for key in mysql_config: 
            mysql_config[key] = cfg.get(CONFIG_MYSQL_SEL, key)
    
        print(mysql_config)
    
        for key in sms_config:
            sms_config[key] = cfg.get(CONFIG_SMS_SEL, key)
        
        print(sms_config)
    
        for key in redis_config:
            redis_config[key] = cfg.get(CONFIG_REDIS_SEL, key)
        
        print(redis_config)
        
        return True
    except:
        print(str(sys.exc_info()))
        
        return False

"""
重命名日志文件。如果日志大于10M,则重命名文件
"""
def renameLogFile(path):
    try:
        size = os.path.getsize(path + "/log_monitor_server.txt")
        bytes = float(size)
        kb = bytes / 1024
        #日志文件10M
        max = 10 * 1024
        if kb >= max:
            count = 0
            #获取当前日志已有的最大后缀值
            for file in os.listdir(path):
                nameArray = os.path.splitext(file);
                #print(nameArray)
                numStr = nameArray[1].replace(".", "")
                if numStr.isdigit() and int(numStr) > count:
                    count = int(numStr)
            #重命名日志文件    
            os.rename(os.path.join(path,"log_monitor_server.txt"), os.path.join(path,"log_monitor_server.txt." + str(count + 1)))
            print("日志已达10M,进行重命名")
    except Exception as err:
        print(err)


def main():
    try:
        
        print("begin to check...", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'));
        
        ms = monitorServer()
        
        #初始化配置文件
        readConfigRet = init_config();
        print(readConfigRet)
        if readConfigRet:
            print("读取配置文件成功")
        else:
            return
        
        
        print("begin to exec check...", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'));
        
        threads = []
        t1 = threading.Thread(target=ms.checkJettyStatus)
        threads.append(t1)
        t2 = threading.Thread(target=ms.checkRedisServer)
        threads.append(t2)
        t3 = threading.Thread(target=ms.checkMisServer)
        threads.append(t3)
        t4 = threading.Thread(target=ms.checkMysqlServer)
        threads.append(t4)
        t5 = threading.Thread(target=ms.checkDiskInfo)
        threads.append(t5)
        t6 = threading.Thread(target=ms.checkMisTokenError)
        threads.append(t6)
        
        
        for n in range(len(threads)):
            threads[n].start()
        
        for n in range(len(threads)):
            threads[n].join()
               
        print("end to exec check!", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        print(monitor_result)
        print("begin to check if send sms!", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        sms_content = ''
        for key in monitor_result:
            #print(key + "--" + str(monitor_result[key]))
            if str(monitor_result[key]) == 'False':
                #print(key + "--" + str(monitor_result[key]))
                sms_content += sms_content_tip[key] + "、"
        
        if len(sms_content) > 0:
            smsRet = ms.sendSMS("检测到 " + sms_content[:-1] + " 异常,请尽快登陆服务器查看详情并恢复环境。")
            if not smsRet:
                print("检测到服务器异常,但短信发送失败!")
        else:
            print("all is normal.", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            
        print("end to check if send sms!", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        print("end to check...", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'));
        
        #重命名日志文件
        renameLogFile("/data/watchDog/logs");
    except:
        print(str(sys.exc_info()))
    
    
if __name__ == "__main__":
    print("=============================")
    print("***  " + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "  ***")
    print("=============================")
    sys.exit(int(main() or 0))

本例监控到异常后发送短信通知用户,发送短信结果以XML格式返回,解析xml代码:

# -*- coding:utf-8 -*- -

import xml.sax.handler 



class XMLHandler(xml.sax.handler.ContentHandler):  

    def __init__(self):  
        self.buffer = ""                    
        self.mapping = {}                  

    def startElement(self, name, attributes):  
        self.buffer = ""                    

    def characters(self, data):  
        self.buffer += data                      

    def endElement(self, name):  
        self.mapping[name] = self.buffer           

    def getDict(self):  
        return self.mapping  

def main():
    print("main")
    
    
if __name__ == "__main__":
    print("self run!")
    main();

 

 

你可能感兴趣的:(python)