使用python调用pinpoint接口实现钉钉报警

 使用python调用pinpoint接口实现钉钉报警。

1、当pinpoint上监控的应用出现错误的调用,且错误次数>5(可以自己设置阈值大小,默认是5)时,即可报警。

2、只需更改脚本中的webhook和PPURL,并放入计划任务,比如每三分钟执行一次。

3、脚本运行时,会检查最近五分钟内应用的错误调用数,超过阈值就会报警。

4、环境:python3.6(也可支持python2)

 

#!/usr/local/bin/python
#Author:zzx
#功能:调用pinpoint接口,监控每个应用的调用错误数,并将告警信息发送到钉钉。
import sys
import os
import requests
import time
import datetime
import json

from dingtalkchatbot.chatbot import DingtalkChatbot #pip install dingtalkchatbot



webhook = "你的钉钉webhook"
PPURL = "http://your_pinpoint_ip:port"

'''获取最近五分钟内的时间戳'''
From_Time = datetime.datetime.now() + datetime.timedelta(seconds=-300)
To_Time = datetime.datetime.now()
From_TimeStamp = int(time.mktime(From_Time.timetuple()))*1000
To_TimeStamp = int(time.mktime(datetime.datetime.now().timetuple()))*1000


"""获取pinpoint中所有服务的基础信息,包括服务名,服务类型等"""
def get_applications():
    '''return application dict
    '''
    applicationListUrl = PPURL + "/applications.pinpoint"
    res = requests.get(applicationListUrl)
    if res.status_code != 200:
        print("请求异常,请检查")
        return
    return res.json()
    #print(res.json()[0])


'''传入服务名,返回该服务的节点数和各节点的节点名'''
def getAgentList(appname):
    AgentListUrl = PPURL + "/getAgentList.pinpoint"
    param = {
        'application':appname
    }
    res = requests.get(AgentListUrl, params=param)
    if res.status_code != 200:
        print("请求异常,请检查")
        return
    return len(res.json().keys()),json.dumps(list(res.json().keys()))



'''获取调用失败次数'''
def update_servermap(appname , from_time=From_TimeStamp,to_time=To_TimeStamp, serviceType='TOMCAT'):
    '''更新app上下游关系
    :param appname: 应用名称
    :param serviceType: 应用类型
    :param from_time: 起始时间
    :param to_time: 终止时间
    :
    '''
    #https://pinpoint.*****.com/getServerMapData.pinpoint?applicationName=test-app&from=1547721493000&to=1547721553000&callerRange=1&calleeRange=1&serviceTypeName=TOMCAT&_=1547720614229
    param = {
        'applicationName':appname,
        'from':from_time,
        'to':to_time,
        'callerRange':1,
        'calleeRange':1,
        'serviceTypeName':serviceType
    }

    # serverMapUrl = PPURL + "/getServerMapData.pinpoint"
    serverMapUrl = "{}{}".format(PPURL, "/getServerMapData.pinpoint")
    res = requests.get(serverMapUrl, params=param)
    if res.status_code != 200:
        print("请求异常,请检查")
        return
    update_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
    links = res.json()["applicationMapData"]["linkDataArray"]
    #links包含该app的上下游调用关系链,以及相互之间调用的次数和失败的次数等信息。
    #print(links)
    #print(len(links))
#    totalCount=0
    errorCount=0
#    slowCount=0
    for link in links :
        ###排除test的应用
        if link['sourceInfo']['applicationName'].startswith('test'):
            continue
        #应用名称、应用类型、下游应用名称、下游应用类型、应用节点数、下游应用节点数、总请求数、 错误请求数、慢请求数(本应用到下一个应用的数量)
#        application = link['sourceInfo']['applicationName']
#        serviceType = link['sourceInfo']['serviceType']
#        to_application = link['targetInfo']['applicationName']
#        to_serviceType = link['targetInfo']['serviceType']
#        agents = len(link.get('fromAgent',' '))
#        to_agents =  len(link.get('toAgent',' '))
        '''总错误数进行累计'''
#        totalCount += link['totalCount']
        errorCount += link['errorCount']
#        slowCount  += link['slowCount']

#    return totalCount
    return errorCount



'''原生钉钉报警,此脚本中没用到'''
def messages(application_name,service_type,error_count):  # 定义信息函数
    headers = {'Content-Type': 'application/json;charset=utf-8'}  # 头部信息,Zabbix官方文档写法,可以查看zabbix官方文档
    text="""
报警策略:ERROR COUNT
报警内容:ERROR COUNT value is {error_count} during the past 5 mins.
服务类型:{service_type}
   服务名:{application_name}
""".format(error_count=error_count,service_type=service_type,application_name=application_name)
    text_info = {  # 编写规则可以查看Zabbix官方文档的Zabbix Api
        "msgtype": "text",
        "at": {
            "atMobiles": [
                "13728978429"
            ],
            "isAtAll": False
        },
        "text": {
            "content": text
        }
    }

    print(requests.post(webhook, json.dumps(text_info), headers=headers).content)  # 将返回的数据编码成 JSON 字符串
    requests.post(webhook, json.dumps(text_info), headers=headers).content  # 将返回的数据编码成 JSON 字符串




 
     

if __name__ == "__main__":
    '''初始化钉钉对象'''
    xiaoding = DingtalkChatbot(webhook)
    at_mobiles=['13728978429']

    '''获取所有服务的app名和服务类型,并存到字典中'''
    applicationLists=get_applications()
    #print(applicationLists)

    '''调试update_servermap函数,需要改动该函数的返回值:totalCount、errotCount、slowCount'''
    #count=update_servermap('push-base', from_time=From_TimeStamp,to_time=To_TimeStamp,serviceType='TOMCAT')
    #print(count)

    '''轮询application,查询每个application在过去五分钟内的总错误数,并通过钉钉报警'''
    for app in applicationLists:
        application_name = app['applicationName']
        service_type = app['serviceType']
        pid = os.fork() #使用fork实现并发
        if pid == 0:
            error_count = update_servermap(application_name, from_time=From_TimeStamp,to_time=To_TimeStamp, serviceType=service_type)
            text = """
pinpoint报警\n\n
> 报警策略:ERROR COUNT\n\n
> 报警内容:ERROR COUNT value is {error_count} during the past 5 mins.\n\n
> 服务类型:{service_type}\n\n
>  服务名:{application_name}
""".format(error_count=error_count, service_type=service_type, application_name=application_name)

            '''如果总调用错误数超过阈值5(根据实际需求进行设置),则报警'''
            if error_count >5:
                #messages(application_name,service_type,error_count)
                xiaoding.send_markdown(title='pp报警', text=text,at_mobiles=at_mobiles)
            exit(0)

希望帮到大家。有什么需要交流的加本人qq:1074060710

参考文章:

https://yq.aliyun.com/articles/690351

你可能感兴趣的:(使用python调用pinpoint接口实现钉钉报警)