根据pod性能实现nacos自动上下线

目录

  • 逻辑
  • 完整代码

Created with Raphaël 2.3.0 开始 prometheus获取CPU的Podname 判断是否满足阀值? 根据podname去k8s中获取pod IP 把IP放到字典中sleep 15秒,15s后再跑一遍上边步骤 判断两次是否满足? nacos 下线实例 nacos 获取监控的实例信息 判断下线的IP是否再下线字典中 结束 上线 yes no yes no yes no

最后时候还判断是否有下线的,有下线的就飞书告警

逻辑

  • 从prometheus获取pod的CPU状态
  • 根据获取到的pod名称到k8s中获取IP地址
  • 停止15s 再次执行前两步
  • 15s后podcpu还是高,那就nacos下线
  • nacos检查下监控的实例名称,获取信息,看看已经下线的是否再要下线的字典里没有
  • 下线的没有再字典里,进行上线
  • 飞书发信息

完整代码

from kubernetes import client, config
import requests,json
import time

import urllib3
urllib3.disable_warnings()

# 1. 连接k8s获取pod的IP
def k8s(host,token,namespace,pod_name):
    config = client.Configuration()
    config.host = host
    config.verify_ssl = False
    config.debug = False
    config.api_key = {'authorization': "Bearer " + token}

    client.Configuration.set_default(config)
    api = client.CoreV1Api()
    namespace = namespace
    pod_name = pod_name
    pod = api.read_namespaced_pod(pod_name, namespace)
    pod_ip = pod.status.pod_ip
    return pod_ip
    # print("Pod IP address:", pod_ip)

# 2. 连接prometheus,获取k8s的pod的cpu的大小
def get_cpu_usage(url, formula):
    query_url = f"{url}/api/v1/query?query={formula}"
    response = requests.get(query_url)
    response.raise_for_status()
    data = response.json()["data"]
    result_list = data["result"]
    cpu_peaks = {}
    for result in result_list:
        pod_name = result["metric"]["pod"]
        cpu_peak = round(float(result["value"][1]))
        cpu_peaks[pod_name] = cpu_peak
    return cpu_peaks

# 3. 连接nacos,可以让服务实现上线下线操作
def nacos_up_down(url,query_params):
    response = requests.put(url,params=query_params)
    response.raise_for_status()

    # response = requests.get(url,params=query_params).url
    # print('上下线URL',response)
    
    
# 4.飞书发送告警
def send_feishu_message(url,message):
    data = {
        "msg_type": "text",
        "content": {
            "text": message
        }
    }
    headers = {
        'Content-Type': 'application/json;charset=utf-8'
    }

    response = requests.post(url, headers=headers, data=json.dumps(data))
    return response.text


# 4. 睡眠15秒,二次判断监控信息
def monitor_cpu(api_server, expr,Judge):
    exceeded_pods = []
    nacos_information_data=[]
    cpu_usage = get_cpu_usage(api_server, expr)
    for pod, v in cpu_usage.items():
        for judge_data in Judge:
            for k8s_deploy,nacos_data in judge_data.items():
                if k8s_deploy in pod:
                    exceeded_pods.append([nacos_data[0],nacos_data[1],pod])
    # print(exceeded_pods)
    time.sleep(10)
    cpu_usage = get_cpu_usage(api_server, expr)
    for podname, v in cpu_usage.items():
        for nacos_information in exceeded_pods:
            if podname == nacos_information[2]:
                # pod_ip = k8s(host=host,token=token,namespace=namespace,pod_name=podname)
                # nacos_information_data.append([nacos_information[0],nacos_information[1],pod_ip])
                # print(nacos_information)
                nacos_information_data.append(nacos_information)
    # return nacos_information_data
    return nacos_information_data

# 5.nacos操作所需信息
def nacos_data_list(K8s_host,K8s_token,K8s_ns,mon_CPU):
    monitor_cpu=mon_CPU
    data_list = []

    # for podname in monitor_cpu(Pro_URL, Pro_formula,Pro_Judge):
    for podname in monitor_cpu:
        pod_ip = k8s(host=K8s_host,token=K8s_token,namespace=K8s_ns,pod_name=podname[2])
        data_list.append([podname[0],podname[1],pod_ip])
    return data_list

# 6.nacos下线操作
def nacos_down(NA_U,NA_I,NA_IF):
    nacos = NA_IF
    for nacos_info in nacos:
        NA_I['enabled'] = 'false'
        NA_I['serviceName'] = nacos_info[0]
        NA_I['ip'] = nacos_info[2]
        NA_I['port'] = nacos_info[1]

        # print(requests.get(NA_U,NA_I).url)
        nacos_up_down(NA_U,NA_I)
       
# 7.nacos信息查询
def nacos_list(NA_U,NA_I,NA_IF):
    nacos_dict = {}
    for nacos_server in NA_IF:
        for k8s_deploy,nacos_data in nacos_server.items():
            NA_I['serviceName'] = nacos_data[0]
            response = requests.get(NA_U,NA_I)
            nacos_dict[nacos_data[0]] = response.json()
    return nacos_dict

# 8.nacos上线操作
def nacos_up(NA_IQ,NA_IF,NA_I,NA_U):
    nacos_server_ip=[]
    for nacos_info in NA_IF:
        nacos_server_ip.append(nacos_info[2])
    for k,v in NA_IQ.items():
        for nacos_api in v['list']:
            if nacos_api.get("enabled") == False:
                if nacos_api.get("ip") not in nacos_server_ip:
                    # print('ffffffffffffffffffffffffffffff---ip',nacos_api.get("ip"))
                    # print('ffffffffffffffffffffffffffffff---ip',nacos_api.get("serviceName").split('@@')[1])
                    NA_I['enabled'] = 'true'
                    NA_I['serviceName'] = nacos_api.get("serviceName").split('@@')[1]
                    NA_I['ip'] = nacos_api.get("ip")
                    NA_I['port'] = nacos_api.get("port")
                    nacos_up_down(NA_U,NA_I)

# 9.nacos查询信息,飞书告警
def nacos_feishu(NA_IQ,WU):
    
    for k,v in NA_IQ.items():
        count = v['count']
        up = 0
        down = 0
        for nacos_api in v["list"]:
            server_name = nacos_api.get("serviceName").split('@@')[1]
            NS = nacos_api.get("serviceName").split('@@')[0]
            if nacos_api.get("enabled") == True:
                up += 1
            else:
                down += 1
        message = f"\n环境:{NS}\n服务名:{server_name}\n总实例:{count}\n上线:{up}\n下线:{down}"
        # send_feishu_message(WU,message)
        if down > 0:
            send_feishu_message(WU,message)

    

if __name__ == "__main__":
    # 公共信息
    namespace="test"
    # 阀值
    cpu_threshold = 85

    # 格式  [{k8s中deploy名称:[nacos服务名,nacos里服务名端口]}]
    Judge=[
        {"aaaaaaaa-test-server":['a-test','8080']},
        {'bbbbbbbb-test-server':['b-test','8181']}
    ]

    # k8s 信息
    host = 'https://127.0.0.1:6443'
    token = 'xxxxxxxxxx'
    # k8s(host=host,token=token,namespace="test",pod_name="aaaaaaaa-test-server-7f9d77b899-hgdnc")

    # prometheus 信息
    ProURL='http://127.0.0.1:9090'
    Proformula='sum(irate(container_cpu_usage_seconds_total{namespace=~"%s",image!=""}[5m])*100)by(namespace,pod)/sum(container_spec_cpu_quota{namespace=~"%s",image!=""}/container_spec_cpu_period{namespace=~"%s",image!=""})by(namespace,pod) > %s' %(namespace,namespace,namespace,cpu_threshold)
    # print(Proformula)
    # print(get_cpu_usage(url=ProURL,formula=Proformula))

    # nacos信息
    name_namspace='test'
    nacos_user="nacos"
    nacos_pass="nacos"

    nacos_url="http://nacos.test.com.cn:8848/nacos/v1/ns/instance"
    nacos_list_url="http://nacos.test.com.cn:8848/nacos/v1/ns/catalog/instances"

    nacos_info = {
        "serviceName": "",
        "groupName": name_namspace,
        "namespaceId": name_namspace,
        "ip": "",
        "port": "",
        "clusterName": "DEFAULT",
        # true上线 false下线
        "enabled": "false",
        "username": nacos_user,
        "password": nacos_pass,
    }
    nacos_list_info={
        "pageSize":"10",
        "pageNo":"1",
        "serviceName":"",
        "groupName": name_namspace,
        "namespaceId": name_namspace,
        "clusterName": "DEFAULT",
        "username": nacos_user,
        "password": nacos_pass,
    }

    # 飞书机器人信息
    webhook_url = "https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxx"
    # message = f"环境:{up}\nNacos服务名称:{count}\n发生CPU占用率过高的告警,请及时处理!"
    # send_feishu_message(webhook_url,message)

    ############################

    mon_CPU=monitor_cpu(ProURL, Proformula,Judge)

    nacos_information=nacos_data_list(K8s_host=host,K8s_token=token,K8s_ns=namespace,mon_CPU=mon_CPU)
    # print('mon_CPU',mon_CPU)
    time.sleep(1)

    nacos_down(NA_U=nacos_url,NA_I=nacos_info,NA_IF=nacos_information)

    nacos_inquire = nacos_list(NA_U=nacos_list_url,NA_I=nacos_list_info,NA_IF=Judge)
    # print(nacos_inquire)
    nacos_up(NA_U=nacos_url,NA_I=nacos_info,NA_IF=nacos_information,NA_IQ=nacos_inquire)
    # NA_IQ=nacos 查询的信息
    # NA_IF=nacos操作所需要的信息
    # NA_I=修改nacos所需要的信息
    # NA_U=nacos_url
    time.sleep(1)
    nacos_inquire_two = nacos_list(NA_U=nacos_list_url,NA_I=nacos_list_info,NA_IF=Judge)
    nacos_feishu(NA_IQ=nacos_inquire_two,WU=webhook_url)

你可能感兴趣的:(kubernetes,安全)