博主 "开着拖拉机回家"带您 Go to New World.✨
个人主页——开着拖拉机回家_大数据运维-CSDN博客 ✨
希望本文能够给您带来一定的帮助文章粗浅,敬请批评指正!
感谢点赞和关注 ,每天进步一点点!加油!
目录
一、概述
二、集群版本信息
三、组件状态信息获取
四、DataNode 启动
五、Python 实现 Rest API获取组件状态并告警
Ambari 借鉴了很多成熟分布式软件的 API 设计。Rest API 就是一个很好地体现。通过 Ambari 的 Rest API,可以在脚本中通过 curl 维护整个集群。并且,我们可以用 Rest API 实现一些无法在 Ambari GUI 上面做的操作。
curl -u admin:admin -i -H X-Requested-By:ambari -XGET http://192.168.2.153:8080/api/v1/clusters/winner/hosts/hdp106/host_components/NODEMANAGER
curl -u admin:admin -i -H X-Requested-By:ambari -XGET http://192.168.2.153:8080/api/v1/clusters/winner/services/HIVE
curl -u admin:admin -i -H X-Requested-By:ambari -XGET http://192.168.2.153:8080/api/v1/clusters/winner/services/HIVE
curl -u admin:admin -i -H X-Requested-By:ambari -XGET http://192.168.2.153:8080/api/v1/clusters/winner/services/TEZ
curl -u admin:admin -i -H X-Requested-By:ambari -XGET http://192.168.2.153:8080/api/v1/clusters/winner/services/HBASE
curl -u admin:admin -i -H X-Requested-By:ambari -XGET http://192.168.2.153:8080/api/v1/clusters/winner/services/HDFS
curl -u admin:admin -i -H X-Requested-By:ambari -XGET http://192.168.2.153:8080/api/v1/clusters/winner/services/ZOOKEEPER
获取hdp106服务器上NODEMANAGER 的状态信息
[winner_spark@hdp105 root]$ curl -u admin:admin -i -H X-Requested-By:ambari -XGET http://192.168.2.153:8080/api/v1/clusters/winner/hosts/hdp106/host_components/NODEMANAGER
HTTP/1.1 200 OK
Date: Tue, 29 Aug 2023 06:15:38 GMT
X-Frame-Options: DENY
X-XSS-Protection: 1; mode=block
X-Content-Type-Options: nosniff
Cache-Control: no-store
Pragma: no-cache
Set-Cookie: AMBARISESSIONID=node0146ihmo69ytgk12k48wrpwrt0v5.node0;Path=/;HttpOnly
Expires: Thu, 01 Jan 1970 00:00:00 GMT
User: admin
Content-Type: text/plain;charset=utf-8
X-Content-Type-Options: nosniff
Vary: Accept-Encoding, User-Agent
Transfer-Encoding: chunked
{
"href" : "http://192.168.2.153:8080/api/v1/clusters/winner/hosts/hdp106/host_components/NODEMANAGER",
"HostRoles" : {
"cluster_name" : "winner",
"component_name" : "NODEMANAGER",
"desired_admin_state" : "INSERVICE",
"desired_repository_version" : "3.1.4.0-315",
"desired_stack_id" : "HDP-3.1",
"desired_state" : "STARTED",
"display_name" : "NodeManager",
"host_name" : "hdp106",
"maintenance_state" : "OFF",
"public_host_name" : "hdp106",
"reload_configs" : false,
"service_name" : "YARN",
"stale_configs" : false,
"state" : "STARTED",
"upgrade_state" : "NONE",
"version" : "3.1.4.0-315",
"actual_configs" : { }
},
"host" : {
"href" : "http://192.168.2.153:8080/api/v1/clusters/winner/hosts/hdp106"
},
"component" : [
{
"href" : "http://192.168.2.153:8080/api/v1/clusters/winner/services/YARN/components/NODEMANAGER",
"ServiceComponentInfo" : {
"cluster_name" : "winner",
"component_name" : "NODEMANAGER",
"service_name" : "YARN"
}
}
],
"processes" : [ ]
}
curl -u admin:admin -i -H 'X-Requested-By:ambari' -X PUT -d '{"RequestInfo":{"context":"Start DATANODE via REST"},"Body" : {"ServiceInfo" : {"state":"STARTED"}}}' http://192.168.2.153:8080/api/v1/clusters/winner/services/HDFS
datanode 启动 返回 Accepted
ambari 页面显示: Start DATANODE via REST 表示我们执行是成功的。
import time
import requests
import json
"""
~~~~~~~~~~~~
author: kangll
date: 2023/8/25 17:22
desc: Ambari rest api 获取组件告警信息
-- curl 请求,如下为测试链接
curl -u admin:admin -i -H X-Requested-By:ambari -XGET http://192.168.2.153:8080/api/v1/clusters/winner/hosts/winner
-- datanode 启动
curl -u admin:admin -i -H 'X-Requested-By:ambari' -X PUT -d '{"RequestInfo":{"context":"Start RESOURCEMANAGER via REST"}
,"Body" : {"ServiceInfo" : {"state":"STARTED"}}}' http://192.168.2.153:8080/api/v1/clusters/winner/services/HDFS
"""
__author__ = 'kanglilong'
# Ambari rest api 访问地址
control_url = "http://192.168.2.153:8080/api/v1/clusters/winner/hosts"
# ambari web 登录账号
AUTH = ("admin", "admin")
headers = {'Content-Type': 'application/json;charset=utf-8'}
# 钉钉URL
api_url = "https://oapi.dingtalk.com/robot/send?access_token=f4e0f344306ce9b6eec60bec95d5aa7c57f4264a791458dc09121dd7e948ac64"
hostname = "hdp105"
ambari_server_ip = "192.168.2.153"
def getComponentStatus(host, component):
"""
获取某个节点 组件的状态
:param host: 主机名
:param component: 组件
:return: 状态
"""
get_component_status_url = control_url + "/{}/host_components/{}".format(
host, component)
try:
rep = requests.get(get_component_status_url, auth=AUTH)
if rep.status_code == 200:
jsonRep = json.loads(rep.text)
status = jsonRep['HostRoles']['state']
return status
else:
print("获取组件状态返回异常")
except Exception as e:
print(e)
def getHostComponentsStatus(host):
"""
获取某个服务器上某个组件的状态信息
:param host:
:return: component_dict 组件与其状态
status 当前节点状态是否符合期望,
getStatus 是否获取到了状态
"""
component_dict = {}
get_host_components_status_url = control_url + "/{}/host_components".format(host)
try:
rep = requests.get(get_host_components_status_url, auth=AUTH)
# 如果状态码是20x 则获取成功
print(rep.status_code)
if str(rep.status_code).startswith("20"):
jsonrep = json.loads(rep.text)
items = jsonrep['items']
for itemJson in items:
item = itemJson['HostRoles']['component_name']
# 排除client 角色,与SQOOP等一直是启动状态的客户端,这些不需要启动,也不需要判断状态
if "CLIENT" not in item and "SQOOP" not in item and "INFRA_SOLR" not in item:
component_status = getComponentStatus(host, item)
# INSTALLED 表示已安装没有启动,我们默认 INSTALLED 的组件没有 STARTED 就是 停止,要发告警信息
if component_status == "INSTALLED":
# {'DATANODE': 'STARTED', 'HBASE_REGIONSERVER': 'STARTED'}
component_dict[item] = component_status
else:
# 没有正常获取到状态
print("没有正常获取到状态")
except Exception as e:
print(e)
return component_dict
def msg(text, api_url):
"""
:param text: 告警文本
:param api_url: 钉钉URL
:return:
"""
json_text = {
"msgtype": "text",
"text": {
"content": text
}, "at": {
"atMobiles": ["1786881xxxx"]
}
}
requests.post(api_url, json.dumps(json_text), headers=headers).content
component_dict = getHostComponentsStatus(hostname)
for compo_dict in component_dict.items():
compo_dict_len = int(len(component_dict))
if compo_dict_len > 50: # 告警信息条数判断,告警信息太频繁钉钉告警可能会阻塞告警
time.sleep(30)
component_name = compo_dict[0]
now_time = time.localtime(time.time())
formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', now_time)
text = "告警对象:IP:" + ambari_server_ip + ' 主机名:' + hostname + ' \n组件名称:' + component_name + " \n告警内容:HDP 集群组件 " + component_name + " 停止运行" + "\n告警时间:" + formatted_time
time.sleep(2) # 告警匀速 发出
msg(text, api_url)
钉钉告警发送成功: