CM监控及短信告警

需要安装cm-api这个python依赖:https://cloudera.github.io/cm_api/docs/python-client/
cm-api相关api文档:https://cloudera.github.io/cm_api/apidocs/v5.15.0/index.html
请注意CM的版本和使用的api版本

# -*- coding:utf-8 -*-
import logging
import os

from cm_api.api_client import ApiResource

from Sms import *

cm_host = "{cm_host}"
username = 'admin'
password = 'admin'
version = 12

'''修改编码字符集为utf-8'''
default_encoding = 'utf-8'
if sys.getdefaultencoding() != default_encoding:
    reload(sys)
    sys.setdefaultencoding(default_encoding)

logger = logging.getLogger()
logger.setLevel(logging.INFO)
BASIC_FORMAT = "%(asctime)s:%(levelname)s:%(message)s"
DATE_FORMAT = '%Y-%m-%d %H:%M:%S'
formatter = logging.Formatter(BASIC_FORMAT, DATE_FORMAT)
console = logging.StreamHandler()
console.setLevel(logging.INFO)
console.setFormatter(formatter)
script_name = sys.exc_info()[-1]
file_handler = logging.FileHandler("%s.log" % os.path.abspath(__file__))
file_handler.setFormatter(formatter)
file_handler.setLevel(logging.INFO)
logger.addHandler(file_handler)
logger.addHandler(console)



def check():
    api = ApiResource(cm_host, username=username, password=password, version=version)
    host_dict = get_host_dict(api)
    messages = service_check(host_dict, api.get_all_clusters()[0].get_all_services())
    if messages:
        return 'CDH集群告警\r\n' + '\r\n'.join(messages)
    else:
        return None


def get_host_dict(api):
    hosts = api.get_all_hosts()
    host_dict = {}
    for h in hosts:
        host_dict[h.hostId] = h.hostname
    return host_dict


def service_check(host_dict, services):
    service_num = 1
    messages = []
    for service in services:
        role_msgs = role_check(host_dict, service_num, service)
        if role_msgs:
            messages.append(str(service_num) + '.' + service.type + '\r\n' + '\r\n'.join(role_msgs))
            service_num += 1
    return messages


def role_check(host_dict, service_num, service):
    role_msgs = []
    role_num = 1
    for role in service.get_all_roles():
        health_check_msg = health_check(service_num, role_num, role)
        if health_check_msg:
            role_msgs.append(
                str(service_num) + '.' + str(role_num) + ' ' + role.type + '/' + host_dict[
                    role.hostRef.hostId] + ':\r\n' + '\r\n'.join(
                    health_check_msg))
            role_num += 1
    return role_msgs


def health_check(service_num, role_num, role):
    health_check_msg = []
    if role.healthSummary == 'BAD' and not role.maintenanceMode:
        health_num = 1
        for healthCheck in role.healthChecks:
            if not healthCheck['suppressed'] and healthCheck['summary'] == 'BAD':
                health_check_msg.append(
                    str(service_num) + '.' + str(role_num) + '.' + str(health_num) + ' ' + healthCheck['name'] + ':' +
                    healthCheck[
                        'summary'])
                health_num += 1
    return health_check_msg


res = check()

if res:
    logger.info(res)
    send_sms(res)#短信接口请自行实现
else:
    logger.info('safe!')

你可能感兴趣的:(CM监控及短信告警)