python脚本:自动检测rrd文件并群发报警邮件

脚本背景:

我所在的公司为运营CDN业务的IDC公司,客户域名的流量图经常会出现毛刺,但是服务的域名非常多,每天挨个流量图看耗时耗力。因此用python写了个可以自动检测异常rrd里异常数值并发送报警邮件的脚本。


由于我们的rrd文件是以服务域名命名的,所以先在相应的API上获取服务域名,然后根据域名扫描rrd文件。我设的是扫描半小时的数值,每10分钟执行一次,大概有2000来个rrd文件,执行一次6、7秒左右。


代码如下:


#!/usr/bin/env python
#coding:utf-8
from pyrrd.graph import DEF,CDEF,AREA
from pyrrd.graph import Graph
from pyrrd.graph import ColorAttributes
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from datetime import datetime
import calendar
import os
import time
import urllib2
import smtplib
import email
import sys
def graphrrd(files):
    now_utc =calendar.timegm(datetime.utcnow().utctimetuple())
    def1 = DEF(rrdfile=files, vname='back',dsName='RX')
    def2 = DEF(rrdfile=files, vname='CDN',dsName='TX')
    cdef1 = CDEF(vname='back_flow',rpn='%s,0.026,*' % def1.vname)
    cdef2 = CDEF(vname='CDN_flow',rpn='%s,0.026,*' % def2.vname)
    area1 = AREA(defObj=cdef1, color='#002A97FF', legend='back_flow')
    area2 = AREA(defObj=cdef2, color='#00CF00FF', legend='CDN_flow')
    ca = ColorAttributes()
    ca.back = '#333333'
    ca.canvas = '#333333'
    ca.shadea = '#000000'
    ca.shadeb = '#111111'
    ca.mgrid = '#CCCCCC'
    ca.axis = '#FFFFFF'
    ca.frame = '#AAAAAA'
    ca.font = '#FFFFFF'
    ca.arrow = '#FFFFFF'
    graphfile = image_dir
    title_url=files[23:-4]
    g = Graph(graphfile, start= now_utc-43200, end= now_utc,vertical_label='flow',title=title_url )
    g.data.extend([def1, def2, cdef1, cdef2, area2, area1])
    g.write()
def connect():
    server=smtplib.SMTP(smtpserver)
    server.ehlo()
    server.login(smtpuser,smtppass)
    return server
def sendmessage(server,to,subj,content):
    msg = MIMEMultipart('related')
    msg['Subject'] = subj
    msg['From']    = smtpuser
    msg['To']      = to
    msg['Date']    = email.Utils.formatdate()   
    msgText = MIMEText(content,"html", "utf-8")
    msg.attach(msgText)
    fp = open(image_dir, 'rb')
    msgImage = MIMEImage(fp.read())
    fp.close()
    msgImage.add_header('Content-ID', '<image1>')
    msg.attach(msgImage)
    try:
        server.sendmail(smtpuser, to, msg.as_string())
    except Exception ,ex:
        print Exception,ex
        print 'Error - send failed'
def aver(rrd_file,n=6):
    global dict_data
    sum1=0
    sum2=0
    sum3=0
    data = os.popen('rrdtool fetch %s AVERAGE -s -1d | tail -%d | grep -v nan| grep -v RX ' % (rrd_file,n)).readlines()
    if len(data)< (n/2):
        log("[ERRORS: %s] has not enough record ! please check it!!\n" % rrd_file)
        return []
    for i in data:
        if len(i) > 25:
            dict_data[i[:10]]=i.strip()[12:].split()
    for i in dict_data.values():
        try:
            sum1 = sum1+float(i[0])
            sum2 = sum2+float(i[1])
            sum3 = sum3+float(i[2])
        except:
            log('%s %s\n' % (rrd_file,i))
    if sum2/len(data) < 3500000000:
        log('WARNING: %s was less then 200M\n' % rrd_file)
        return []
    return [sum1/len(data),sum2/len(data),sum3/len(data)]
def check(average):
    wrong_t=[]
    for key in dict_data:
        if float(dict_data[key][1])/average > 1.6:
            wrong_t.append(key)
    return wrong_t
                          
def update(rrd_file,t,aver1,aver2,aver3):
    global text
    global dict_data
    errors_time=os.popen('date -d "1970-01-01 UTC %s seconds"' % t).readline().strip()
    content = '<br/><br/>%s 异常信息:<br/>&nbsp;&nbsp;&nbsp; 域名:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; %s <br/>&nbsp;&nbsp;&nbsp; 时间:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; %s<br/>&nbsp;&nbsp;&nbsp; 流量值:&nbsp;&nbsp;&nbsp;&nbsp; 回源带宽: %.2fM , cdn带宽 : %dM <br/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <br/>rrd 异常信息:<br/>&nbsp;&nbsp;&nbsp; 路径:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; %s<br/>&nbsp;&nbsp;&nbsp; UTC 时间:&nbsp;&nbsp;&nbsp; %s<br/>&nbsp;&nbsp;&nbsp; 异常值:&nbsp;&nbsp;&nbsp;&nbsp; [%s], [%s], [%s]<br/><br/><img src="cid:image1">' % (rrd_file[23:-4],rrd_file[23:-4],errors_time,float(dict_data[t][0])*8/300000000,int(float(dict_data[t][1])*8/300000000),rrd_file,t,dict_data[t][0],dict_data[t][1],dict_data[t][2])
    write_error('[ %s ]: at[ %s(%s) ],the value was [%s] [%s] [%s] \n' %(rrd_file,errors_time,t,dict_data[t][0],dict_data[t][1],dict_data[t][2]))
    text = text + content
def log(log_write):
    f = open('%s/rrd_alt1.log' % rrd_bak, 'a') 
    f.write(log_write)
    f.close()
def write_error(log_write):
    f = open('%s/rrd_error1.log' % rrd_bak, 'a')
    f.write(log_write)
    f.close
                  
def run_script(rrd_file):
    global to_all
    global text
    aver_rrd=aver(rrd_file)
    if len(aver_rrd) == 0:
        return  
    wrong_time=check(aver_rrd[1])
    if len(wrong_time)==0:
        log('[%s] no errors !\n' % (rrd_file))
        return
    for t in wrong_time:
        update(rrd_file,t,aver_rrd[0],aver_rrd[1],aver_rrd[2]) 
    graphrrd(rrd_file)
    if text:
        for to in to_all:
            server=connect()
            sendmessage(server,to,subj,text)
            log('sendmail to %s\n' % to)   
if __name__=='__main__':
    image_time=time.strftime("%d-%H-%M")
    rrd_dir='/data/rrd/db/1/billing'
    rrd_bak='/data/rrd/db/1/billing/bak'
    smtpserver='xxx'
    image_dir='%s/rrdgraph_%s.png' % (rrd_bak,image_time)
    smtpuser='xxx'
    smtppass='yyy'
    to_all=['xxx','yyy']
    subj='check the flow of CDN!!!!'
    while True:
        url_list=[]
        local_time = time.strftime("%m-%d %H:%M:%S")
        url=urllib2.urlopen('xxx').readlines()
        for u in url:
            a = "%s/%s.rrd" % (rrd_dir,u.strip())
            url_list.append(a)
        log("-"*60+"\n")
        log("the script run time at %s \n" % local_time)
        while len(url_list):
            text=''
            dict_data={}
            rrd_file = url_list.pop()
            if os.path.exists(rrd_file):
                run_script(rrd_file)
            else:
                continue          
        log("-"*60+"\n")
    break

邮件截图

173433350.jpg


本文出自 “哲就是我” 博客,谢绝转载!

你可能感兴趣的:(python,邮件报警,检测流量图异常)