本文通过python代码调用Yarn restfull API ,将各队列的vcore和memory定时采集存储到TIDB,然后使用Apache Zeppelin JDBC读取数据,简单绘制vcore和memory使用率的趋势图。可以观察集群vcore和memory的分配情况,防止其中一项成为集群或者队列的性能瓶颈。
import json
import urllib2
import datetime
# yarn rest api:
# http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Writeable_APIs
def getActiveRN(master1,master2):
activemaster=""
response=urllib2.urlopen("http://"+master1+"/ws/v1/cluster/info")
jsonstring=response.read()
print(jsonstring)
j1=json.loads(jsonstring)
print(master1 +" resourcemanager state is :"+j1['clusterInfo']['haState'])
response=urllib2.urlopen("http://"+master2+"/ws/v1/cluster/info")
jsonstring=response.read()
print(jsonstring)
j2=json.loads(jsonstring)
print(master2 +" resourcemanager state is :"+j2['clusterInfo']['haState'])
if j1['clusterInfo']['haState']=='ACTIVE':
print("active master is "+master1)
activemaster=master1
elif j2['clusterInfo']['haState']=='ACTIVE':
print("active master is "+master2)
activemaster=master2
else :
raise Exception("on active resourcemanger in %s,%s "%(master1,master2))
return activemaster
def getClusterScheduler(activeResourceManger):
response=urllib2.urlopen("http://"+master1+"/ws/v1/cluster/scheduler")
jsonstring=response.read()
print(jsonstring)
jsonarray=json.loads(jsonstring)
print(jsonarray)
return jsonarray
def getQueueInfo(queuename,ClusterScheduler):
jsonarray=ClusterScheduler['scheduler']['schedulerInfo']['queues']['queue']
print("**************** %s scheduler info :%s"%(queuename,jsonarray))
print("*********************************************************")
print("**************** %s scheduler1 info :%s"%(queuename,jsonarray[1]))
for i in range(0,len(jsonarray)):
if(jsonarray[i]['queueName']==queuename):
print("find queuename:%s info %s"%(queuename,jsonarray[i]))
return jsonarray[i]
def findSubQueueInfo(queuename,parenetClusterScheduler):
print("*********************begin findSubQueueInfo:%s**********"%queuename)
jsonarray=parenetClusterScheduler['queues']['queue']
for i in range(0,len(jsonarray)):
if(jsonarray[i]['queueName']==queuename):
print("*********************finish findSubQueueInfo:%s**********" % queuename)
return jsonarray[i]
def clusteMetrics(activeResourceManger):
response = urllib2.urlopen("http://" + activeResourceManger + "/ws/v1/cluster/metrics")
jsonstring = response.read()
jsonarray = json.loads(jsonstring)
return jsonarray
if __name__ == "__main__":
master1="192.168.240.1:8088"
master2="192.168.240.2:8088"
parenetQueue=['sto','dm','bd','event']
bdchildrenQueue = ['vip', 'tenhive', 'AthenaSysService', 'default']
activemaster=getActiveRN(master1,master2)
clustemetrics=clusteMetrics(activemaster)
clustemetrics['clusterMetrics']['totalVirtualCores']
allvcore = clustemetrics['clusterMetrics']['totalVirtualCores']
allmemory = clustemetrics['clusterMetrics']['totalMB']
clusterScheduler=getClusterScheduler(activemaster)
bd=getQueueInfo('bd',clusterScheduler)
#defaultQueueInfo=findSubQueueInfo('default', bd)
#print(defaultQueueInfo['resourcesUsed']['vCores'])
currentAllvcore=0
currentAllmemory=0
currentAllvcorePercentage=0.0
currentAllmemoryPercentage=0.0
fo = open("QueueInfo.txt", "a+")
now_time='\''+datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%M:%S')+'\''
for i in parenetQueue:
queueInfo = getQueueInfo( i, clusterScheduler)
currentAllvcore=currentAllvcore +queueInfo['resourcesUsed']['vCores']
currentAllvcorePercentage=currentAllvcorePercentage+(queueInfo['resourcesUsed']['vCores']*1.0/allvcore)
currentAllmemory=currentAllmemory +queueInfo['resourcesUsed']['memory']
currentAllmemoryPercentage=currentAllmemoryPercentage +queueInfo['resourcesUsed']['memory']*1.0/allmemory
queuename='\'root.'+i+'\''
fo.write("insert into yarn_monitor.yarn_vcore_memory_monitor"
"(curr_time,queuename,currentAllvcore,currentAllmemory,"
"currentAllvcorePercentage,currentAllmemoryPercentage) "
"VALUES (%s,%s,%s,%s,%s,%s);\n" % (
now_time,queuename,currentAllvcore, currentAllmemory, currentAllvcorePercentage, currentAllmemoryPercentage)
)
for i in bdchildrenQueue :
queueInfo = findSubQueueInfo(i, bd)
currentAllvcore = currentAllvcore + queueInfo['resourcesUsed']['vCores']
currentAllvcorePercentage = currentAllvcorePercentage + (queueInfo['resourcesUsed']['vCores'] * 1.0 / allvcore)
currentAllmemory = currentAllmemory + queueInfo['resourcesUsed']['memory']
currentAllmemoryPercentage = currentAllmemoryPercentage + queueInfo['resourcesUsed']['memory'] * 1.0 / allmemory
queuename = '\'root.bd.' + i + '\''
fo.write("insert into yarn_monitor.yarn_vcore_memory_monitor"
"(curr_time,queuename,currentAllvcore,currentAllmemory,"
"currentAllvcorePercentage,currentAllmemoryPercentage) "
"VALUES (%s,%s,%s,%s,%s,%s);\n" % (
now_time, queuename, currentAllvcore, currentAllmemory, currentAllvcorePercentage,
currentAllmemoryPercentage)
)
print("currentAllvcore = %s"%currentAllvcore)
print("currentAllallmemory = %s"%currentAllmemory)
print("currentAllvcorePercentage = %s"%currentAllvcorePercentage)
print("currentAllallmemoryPercentage = %s"%currentAllmemoryPercentage)
fo.write("insert into yarn_monitor.yarn_vcore_memory_monitor"
"(curr_time,queuename,currentAllvcore,currentAllmemory,"
"currentAllvcorePercentage,currentAllmemoryPercentage) "
"VALUES (%s,'root',%s,%s,%s,%s);\n"%(now_time,currentAllvcore,currentAllmemory,currentAllvcorePercentage,currentAllmemoryPercentage)
)
fo.close()
创建TIDB 表并设置zeppelin jdbc 连接参数:
CREATE TABLE `yarn_vcore_memory_monitor` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`curr_time` datetime NOT NULL,
`queuename` varchar(50) DEFAULT NULL,
`currentAllvcore` int(11) DEFAULT NULL,
`currentAllmemory` int(11) DEFAULT NULL,
`currentAllvcorePercentage` double DEFAULT NULL,
`currentAllmemoryPercentage` double DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin AUTO_INCREMENT=30001;
3、zeppelin查看数据
SELECT id, currentAllvcorePercentage, currentAllmemoryPercentage
FROM yarn_monitor.yarn_vcore_memory_monitor
WHERE curr_time < DATE_FORMAT(date_add(curdate(), INTERVAL 1 DAY), '%Y-%m-%d %H:%i:%S') #明天早上
AND curr_time > DATE_FORMAT(curdate(), '%Y-%m-%d %H:%i:%S');#今天早上
观察下面vcore和memory使用百分比趋势图,如果其中一项成为性能瓶颈,需要增加相应队列的资源。