python实现对kafka中日志数据监控报告(钉钉)

# !/usr/bin/env python
# coding:utf-8
# 该脚本是作统计

from datetime import datetime, timedelta
import os
from dingtalkchatbot.chatbot import DingtalkChatbot
from kafka import KafkaConsumer
import re

# 存放告警日志
messages = {500: {}, 400: {}}


# import logging as log
# log.basicConfig(level=log.DEBUG)

# 排序,输出顺序是先 5XX 再 4XX,并且按出现uri次数的顺序排序。
def sort_dict():
    global messages
    tmp = messages
    for i in tmp:
        tmp[i] = sorted(tmp[i].iteritems(), key=lambda item: item[1], reverse=True)
    tmp = sorted(tmp.iteritems(), key=lambda tmp: tmp[0], reverse=True)
    return tmp


def send_mail():
    try:
        message = ''
        alldict = sort_dict()
        for i in alldict:
            if i[0] == 500:
                t = '5XX'
                message = message + '*******' + t + ':' + '\n'
            else:
                t = '4XX'
                message = message + '*******' + t + ':' + '\n'
            for j in i[1]:
                message = message + j[0] + '    ' + str(j[1]) + '\n'
        # 钉钉报警接口,开发给的接口。。。。
        a = 'curl url -d "business=gateway&content=%s"' % message
        print os.system(a)
        # 或者是用相关模块调用钉钉的机器人
        # posturl = "https://oapi.dingtalk.com/robot/send?access_token=" \
        #         "378cc60b9306b89e53d71cecccfe70**********************"
        # xiaoding = DingtalkChatbot(posturl)
        # xiaoding.send_text(msg=message)
        print 'successfully sent!'
        return 0
    # 报警成功返回一个值,作为一个标识
    except Exception as e:
        now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        print('nnnnnnnn', now, e)
        return -1


def tactics(time1, send_time, delta):
    global messages
    rt = send_time
    if time1 - send_time > delta:
        # 当前有错误的状态码的时间如果大于清空过后的messages时间1小时,就报警
        print 'time1 - send_time > delta'
        a = send_mail()
        print a
        if a == 0:
            # 如果成功报警,那么就清空全局变量messages
            send_time = time1
            rt = send_time
            messages = {500: {}, 400: {}}
            print 'clean messages'
    return rt
    # 返回清空messages的时间


# 对消息进行字段过滤
def filter(msg):
    '''t = ['\"server_name: ' + re.findall(r'\"server_name\": \"(.+?)\",', msg)[0] + '\", '
         + '\"request: ' + re.findall(r'\"request\": \"(.+?)\",', msg)[0] + '\", '
         + '\"upstream_status: ' + re.findall(r'\"upstream_status\": \"(.+?)\",', msg)[0] + '\" '
         ]'''
    '''t = [re.findall(r'\"upstream_status\": \"(.+?)\",', msg)[0] + '   '
         + re.findall(r'\"server_name\": \"(.+?)\",', msg)[0]
         + re.findall(r'\"request\": \"(.+?)\",', msg)[0].split(' ')[1]
         ]'''
    # 判断是否有appid,没有的话输出的是‘--’
    if len(re.findall(r'\"http_x_app_id\": \"(.+?)\",', msg)) != 0:
        t = [re.findall(r'\"upstream_status\": \"(.+?)\",', msg)[0] + '   '
             + re.findall(r'\"http_x_app_id\": \"(.+?)\",', msg)[0] + '   '
             + re.findall(r'\"server_name\": \"(.+?)\",', msg)[0] +
             re.findall(r'\"request\": \"(.+?)\",', msg)[0].split(' ')[1]
             ]
    else:
        t = [re.findall(r'\"upstream_status\": \"(.+?)\",', msg)[0] + '   '
             + ' --  ' + '   '
             + re.findall(r'\"server_name\": \"(.+?)\",', msg)[0] +
             re.findall(r'\"request\": \"(.+?)\",', msg)[0].split(' ')[1]
             ]
    return t


def kafka_cli(bootstrap_servers, source_topic):
    send_time = datetime.now()
    # 初始时间
    print 'send_time', send_time
    # delta = timedelta(minutes=30)
    delta = timedelta(hours=1)
    # 时间差
    # delta = timedelta(seconds=5)
    while True:
        try:
            consumer = KafkaConsumer(source_topic, bootstrap_servers=bootstrap_servers)
            for msg in consumer:
                t = msg.value
                # 获取该条日志
                time1 = datetime.now()
                # 记录该条日志的获取时间
                t = t.replace('\\', '')
                # 对有的日志进行字符替换,方便匹配
                status = re.findall(r'\"upstream_status\": \"(.+?)\",', t)
                # 过滤状态码
                # print status
                if status[0] != '-':
                    # 对状态码进行判断分类
                    status = int(status[0])
                    if status < 400:
                        print "ok", status
                    else:
                        # print 'err'
                        t = filter(t)
                        # 调用字段过滤方法
                        if status > 499:
                            print t[0]
                            if t[0] not in messages[500]:
                                # 如果该状态码没有在字典中,就添加
                                messages[500][t[0]] = 0
                            messages[500][t[0]] += 1
                            rt = tactics(time1, send_time, delta)
                            # 调用报警策略的方法
                            send_time = rt
                        else:
                            print t[0]
                            if t[0] not in messages[400]:
                                messages[400][t[0]] = 0
                            messages[400][t[0]] += 1
                            rt = tactics(time1, send_time, delta)
                            send_time = rt
                            # 更新send_time

                else:
                    print status

        except Exception as e:
            now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            print(now, e)
            consumer.close()


def main():
    bootstrap_servers = 'kafkaip:9092'
    source_topic = 'kibana'
    kafka_cli(bootstrap_servers, source_topic)

if __name__ == '__main__':
    main()

你可能感兴趣的:(python基础)