# !/usr/bin/env python
# coding:utf-8
# 该脚本是作统计
from datetime import datetime, timedelta
import os
from dingtalkchatbot.chatbot import DingtalkChatbot
from kafka import KafkaConsumer
import re
# 存放告警日志
messages = {500: {}, 400: {}}
# import logging as log
# log.basicConfig(level=log.DEBUG)
# 排序,输出顺序是先 5XX 再 4XX,并且按出现uri次数的顺序排序。
def sort_dict():
global messages
tmp = messages
for i in tmp:
tmp[i] = sorted(tmp[i].iteritems(), key=lambda item: item[1], reverse=True)
tmp = sorted(tmp.iteritems(), key=lambda tmp: tmp[0], reverse=True)
return tmp
def send_mail():
try:
message = ''
alldict = sort_dict()
for i in alldict:
if i[0] == 500:
t = '5XX'
message = message + '*******' + t + ':' + '\n'
else:
t = '4XX'
message = message + '*******' + t + ':' + '\n'
for j in i[1]:
message = message + j[0] + ' ' + str(j[1]) + '\n'
# 钉钉报警接口,开发给的接口。。。。
a = 'curl url -d "business=gateway&content=%s"' % message
print os.system(a)
# 或者是用相关模块调用钉钉的机器人
# posturl = "https://oapi.dingtalk.com/robot/send?access_token=" \
# "378cc60b9306b89e53d71cecccfe70**********************"
# xiaoding = DingtalkChatbot(posturl)
# xiaoding.send_text(msg=message)
print 'successfully sent!'
return 0
# 报警成功返回一个值,作为一个标识
except Exception as e:
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print('nnnnnnnn', now, e)
return -1
def tactics(time1, send_time, delta):
global messages
rt = send_time
if time1 - send_time > delta:
# 当前有错误的状态码的时间如果大于清空过后的messages时间1小时,就报警
print 'time1 - send_time > delta'
a = send_mail()
print a
if a == 0:
# 如果成功报警,那么就清空全局变量messages
send_time = time1
rt = send_time
messages = {500: {}, 400: {}}
print 'clean messages'
return rt
# 返回清空messages的时间
# 对消息进行字段过滤
def filter(msg):
'''t = ['\"server_name: ' + re.findall(r'\"server_name\": \"(.+?)\",', msg)[0] + '\", '
+ '\"request: ' + re.findall(r'\"request\": \"(.+?)\",', msg)[0] + '\", '
+ '\"upstream_status: ' + re.findall(r'\"upstream_status\": \"(.+?)\",', msg)[0] + '\" '
]'''
'''t = [re.findall(r'\"upstream_status\": \"(.+?)\",', msg)[0] + ' '
+ re.findall(r'\"server_name\": \"(.+?)\",', msg)[0]
+ re.findall(r'\"request\": \"(.+?)\",', msg)[0].split(' ')[1]
]'''
# 判断是否有appid,没有的话输出的是‘--’
if len(re.findall(r'\"http_x_app_id\": \"(.+?)\",', msg)) != 0:
t = [re.findall(r'\"upstream_status\": \"(.+?)\",', msg)[0] + ' '
+ re.findall(r'\"http_x_app_id\": \"(.+?)\",', msg)[0] + ' '
+ re.findall(r'\"server_name\": \"(.+?)\",', msg)[0] +
re.findall(r'\"request\": \"(.+?)\",', msg)[0].split(' ')[1]
]
else:
t = [re.findall(r'\"upstream_status\": \"(.+?)\",', msg)[0] + ' '
+ ' -- ' + ' '
+ re.findall(r'\"server_name\": \"(.+?)\",', msg)[0] +
re.findall(r'\"request\": \"(.+?)\",', msg)[0].split(' ')[1]
]
return t
def kafka_cli(bootstrap_servers, source_topic):
send_time = datetime.now()
# 初始时间
print 'send_time', send_time
# delta = timedelta(minutes=30)
delta = timedelta(hours=1)
# 时间差
# delta = timedelta(seconds=5)
while True:
try:
consumer = KafkaConsumer(source_topic, bootstrap_servers=bootstrap_servers)
for msg in consumer:
t = msg.value
# 获取该条日志
time1 = datetime.now()
# 记录该条日志的获取时间
t = t.replace('\\', '')
# 对有的日志进行字符替换,方便匹配
status = re.findall(r'\"upstream_status\": \"(.+?)\",', t)
# 过滤状态码
# print status
if status[0] != '-':
# 对状态码进行判断分类
status = int(status[0])
if status < 400:
print "ok", status
else:
# print 'err'
t = filter(t)
# 调用字段过滤方法
if status > 499:
print t[0]
if t[0] not in messages[500]:
# 如果该状态码没有在字典中,就添加
messages[500][t[0]] = 0
messages[500][t[0]] += 1
rt = tactics(time1, send_time, delta)
# 调用报警策略的方法
send_time = rt
else:
print t[0]
if t[0] not in messages[400]:
messages[400][t[0]] = 0
messages[400][t[0]] += 1
rt = tactics(time1, send_time, delta)
send_time = rt
# 更新send_time
else:
print status
except Exception as e:
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(now, e)
consumer.close()
def main():
bootstrap_servers = 'kafkaip:9092'
source_topic = 'kibana'
kafka_cli(bootstrap_servers, source_topic)
if __name__ == '__main__':
main()