最后时候还判断是否有下线的,有下线的就飞书告警
from kubernetes import client, config
import requests,json
import time
import urllib3
urllib3.disable_warnings()
# 1. 连接k8s获取pod的IP
def k8s(host,token,namespace,pod_name):
config = client.Configuration()
config.host = host
config.verify_ssl = False
config.debug = False
config.api_key = {'authorization': "Bearer " + token}
client.Configuration.set_default(config)
api = client.CoreV1Api()
namespace = namespace
pod_name = pod_name
pod = api.read_namespaced_pod(pod_name, namespace)
pod_ip = pod.status.pod_ip
return pod_ip
# print("Pod IP address:", pod_ip)
# 2. 连接prometheus,获取k8s的pod的cpu的大小
def get_cpu_usage(url, formula):
query_url = f"{url}/api/v1/query?query={formula}"
response = requests.get(query_url)
response.raise_for_status()
data = response.json()["data"]
result_list = data["result"]
cpu_peaks = {}
for result in result_list:
pod_name = result["metric"]["pod"]
cpu_peak = round(float(result["value"][1]))
cpu_peaks[pod_name] = cpu_peak
return cpu_peaks
# 3. 连接nacos,可以让服务实现上线下线操作
def nacos_up_down(url,query_params):
response = requests.put(url,params=query_params)
response.raise_for_status()
# response = requests.get(url,params=query_params).url
# print('上下线URL',response)
# 4.飞书发送告警
def send_feishu_message(url,message):
data = {
"msg_type": "text",
"content": {
"text": message
}
}
headers = {
'Content-Type': 'application/json;charset=utf-8'
}
response = requests.post(url, headers=headers, data=json.dumps(data))
return response.text
# 4. 睡眠15秒,二次判断监控信息
def monitor_cpu(api_server, expr,Judge):
exceeded_pods = []
nacos_information_data=[]
cpu_usage = get_cpu_usage(api_server, expr)
for pod, v in cpu_usage.items():
for judge_data in Judge:
for k8s_deploy,nacos_data in judge_data.items():
if k8s_deploy in pod:
exceeded_pods.append([nacos_data[0],nacos_data[1],pod])
# print(exceeded_pods)
time.sleep(10)
cpu_usage = get_cpu_usage(api_server, expr)
for podname, v in cpu_usage.items():
for nacos_information in exceeded_pods:
if podname == nacos_information[2]:
# pod_ip = k8s(host=host,token=token,namespace=namespace,pod_name=podname)
# nacos_information_data.append([nacos_information[0],nacos_information[1],pod_ip])
# print(nacos_information)
nacos_information_data.append(nacos_information)
# return nacos_information_data
return nacos_information_data
# 5.nacos操作所需信息
def nacos_data_list(K8s_host,K8s_token,K8s_ns,mon_CPU):
monitor_cpu=mon_CPU
data_list = []
# for podname in monitor_cpu(Pro_URL, Pro_formula,Pro_Judge):
for podname in monitor_cpu:
pod_ip = k8s(host=K8s_host,token=K8s_token,namespace=K8s_ns,pod_name=podname[2])
data_list.append([podname[0],podname[1],pod_ip])
return data_list
# 6.nacos下线操作
def nacos_down(NA_U,NA_I,NA_IF):
nacos = NA_IF
for nacos_info in nacos:
NA_I['enabled'] = 'false'
NA_I['serviceName'] = nacos_info[0]
NA_I['ip'] = nacos_info[2]
NA_I['port'] = nacos_info[1]
# print(requests.get(NA_U,NA_I).url)
nacos_up_down(NA_U,NA_I)
# 7.nacos信息查询
def nacos_list(NA_U,NA_I,NA_IF):
nacos_dict = {}
for nacos_server in NA_IF:
for k8s_deploy,nacos_data in nacos_server.items():
NA_I['serviceName'] = nacos_data[0]
response = requests.get(NA_U,NA_I)
nacos_dict[nacos_data[0]] = response.json()
return nacos_dict
# 8.nacos上线操作
def nacos_up(NA_IQ,NA_IF,NA_I,NA_U):
nacos_server_ip=[]
for nacos_info in NA_IF:
nacos_server_ip.append(nacos_info[2])
for k,v in NA_IQ.items():
for nacos_api in v['list']:
if nacos_api.get("enabled") == False:
if nacos_api.get("ip") not in nacos_server_ip:
# print('ffffffffffffffffffffffffffffff---ip',nacos_api.get("ip"))
# print('ffffffffffffffffffffffffffffff---ip',nacos_api.get("serviceName").split('@@')[1])
NA_I['enabled'] = 'true'
NA_I['serviceName'] = nacos_api.get("serviceName").split('@@')[1]
NA_I['ip'] = nacos_api.get("ip")
NA_I['port'] = nacos_api.get("port")
nacos_up_down(NA_U,NA_I)
# 9.nacos查询信息,飞书告警
def nacos_feishu(NA_IQ,WU):
for k,v in NA_IQ.items():
count = v['count']
up = 0
down = 0
for nacos_api in v["list"]:
server_name = nacos_api.get("serviceName").split('@@')[1]
NS = nacos_api.get("serviceName").split('@@')[0]
if nacos_api.get("enabled") == True:
up += 1
else:
down += 1
message = f"\n环境:{NS}\n服务名:{server_name}\n总实例:{count}\n上线:{up}\n下线:{down}"
# send_feishu_message(WU,message)
if down > 0:
send_feishu_message(WU,message)
if __name__ == "__main__":
# 公共信息
namespace="test"
# 阀值
cpu_threshold = 85
# 格式 [{k8s中deploy名称:[nacos服务名,nacos里服务名端口]}]
Judge=[
{"aaaaaaaa-test-server":['a-test','8080']},
{'bbbbbbbb-test-server':['b-test','8181']}
]
# k8s 信息
host = 'https://127.0.0.1:6443'
token = 'xxxxxxxxxx'
# k8s(host=host,token=token,namespace="test",pod_name="aaaaaaaa-test-server-7f9d77b899-hgdnc")
# prometheus 信息
ProURL='http://127.0.0.1:9090'
Proformula='sum(irate(container_cpu_usage_seconds_total{namespace=~"%s",image!=""}[5m])*100)by(namespace,pod)/sum(container_spec_cpu_quota{namespace=~"%s",image!=""}/container_spec_cpu_period{namespace=~"%s",image!=""})by(namespace,pod) > %s' %(namespace,namespace,namespace,cpu_threshold)
# print(Proformula)
# print(get_cpu_usage(url=ProURL,formula=Proformula))
# nacos信息
name_namspace='test'
nacos_user="nacos"
nacos_pass="nacos"
nacos_url="http://nacos.test.com.cn:8848/nacos/v1/ns/instance"
nacos_list_url="http://nacos.test.com.cn:8848/nacos/v1/ns/catalog/instances"
nacos_info = {
"serviceName": "",
"groupName": name_namspace,
"namespaceId": name_namspace,
"ip": "",
"port": "",
"clusterName": "DEFAULT",
# true上线 false下线
"enabled": "false",
"username": nacos_user,
"password": nacos_pass,
}
nacos_list_info={
"pageSize":"10",
"pageNo":"1",
"serviceName":"",
"groupName": name_namspace,
"namespaceId": name_namspace,
"clusterName": "DEFAULT",
"username": nacos_user,
"password": nacos_pass,
}
# 飞书机器人信息
webhook_url = "https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxx"
# message = f"环境:{up}\nNacos服务名称:{count}\n发生CPU占用率过高的告警,请及时处理!"
# send_feishu_message(webhook_url,message)
############################
mon_CPU=monitor_cpu(ProURL, Proformula,Judge)
nacos_information=nacos_data_list(K8s_host=host,K8s_token=token,K8s_ns=namespace,mon_CPU=mon_CPU)
# print('mon_CPU',mon_CPU)
time.sleep(1)
nacos_down(NA_U=nacos_url,NA_I=nacos_info,NA_IF=nacos_information)
nacos_inquire = nacos_list(NA_U=nacos_list_url,NA_I=nacos_list_info,NA_IF=Judge)
# print(nacos_inquire)
nacos_up(NA_U=nacos_url,NA_I=nacos_info,NA_IF=nacos_information,NA_IQ=nacos_inquire)
# NA_IQ=nacos 查询的信息
# NA_IF=nacos操作所需要的信息
# NA_I=修改nacos所需要的信息
# NA_U=nacos_url
time.sleep(1)
nacos_inquire_two = nacos_list(NA_U=nacos_list_url,NA_I=nacos_list_info,NA_IF=Judge)
nacos_feishu(NA_IQ=nacos_inquire_two,WU=webhook_url)