当然思路和脚本参考了网上的,但网上的那些有好多错误,以下为本人经过更改调试后的。
如有疑问可以联系我 QQ: 279379936,一起改进优化。
Centos6下安装easy_install
# yum install python-setuptools
安装python 的docker模块
# easy_install docker-py
sudo:sorry, you must have a tty to run sudo
使用不同账户,执行执行脚本时候sudo经常会碰到 sudo: sorry, you must have a tty to run sudo这个情况,其实修改一下sudo的配置就好了
# vim /etc/sudoers (最好用visudo命令)
注释掉 Default requiretty 一行
#Default requiretty
意思就是sudo默认需要tty终端。注释掉就可以在后台执行了。
Zabbix客户端的部署:
#vim /opt/zabbix/etc/zabbix_agentd.conf
#docker
UserParameter=docker_discovery[*],cat/opt/zabbix/script/docker_cons.txt //用来发现宿主机上存活的容器
UserParameter=docker_stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 //用来监控容器的各种指标,后面会脚本具体体现,看不懂脚本的请路过。
UserParameter=docker.tomcat.discovery,cat/opt/zabbix/script/docker_tomcat.txt //用来发现容器启动的tomcat服务
UserParameter=docker.tomcat.stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 $3 //用来监控容器中tomcat的端口
UserParameter=docker.nginx.discovery,cat/opt/zabbix/script/docker_nginx.txt //用来发现容器启动的nginx服务
UserParameter=docker.nginx.stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 $3 //用来监控容器中nginx的端口
监控脚本1,用来监控容器的CPU 内存 网卡,服务端口
#cat /opt/zabbix/script/zabbix_monitor_docker.py
#!/usr/bin/envpython
#-*- coding:utf-8 -*-
#email:[email protected]
from dockerimport Client
import sys
import subprocess
import os
import time
import commands
defcheck_container_stats(container_name,collect_item):
container_collect=docker_client.stats(container_name)
container_collect.next()
old_result=eval(container_collect.next())
new_result=eval(container_collect.next())
container_collect.close()
if collect_item == 'cpu_total_usage':
result=new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']
elif collect_item == 'cpu_system_usage':
result=new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']
elif collect_item == 'cpu_percent':
cpu_total_usage=new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']
cpu_system_uasge=new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']
cpu_num=len(old_result['cpu_stats']['cpu_usage']['percpu_usage'])
result=round((float(cpu_total_usage)/float(cpu_system_uasge))*cpu_num*100.0,2)
elif collect_item == 'mem_usage':
result=new_result['memory_stats']['usage']
elif collect_item == 'mem_limit':
result=new_result['memory_stats']['limit']
elif collect_item == 'mem_percent':
mem_usage=new_result['memory_stats']['usage']
mem_limit=new_result['memory_stats']['limit']
result=round(float(mem_usage)/float(mem_limit)*100.0,2)
elif collect_item == 'network_rx_bytes':
network_check_command="""dockerexec %s ifconfig eth1|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print$1,$2}'|awk -F ')' '{print $1}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name
network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
#print time.time()
#print network_old_result
time.sleep(1)
network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
#print time.time()
#print network_new_result
#unit b
result=int(network_new_result['rx']) -int(network_old_result['rx'])
elif collect_item == 'network_tx_bytes':
network_check_command="""dockerexec %s ifconfig eth1|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print$1,$2}'|awk -F ')' '{print $1}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name
network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
time.sleep(1)
network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
result=int(network_new_result['tx']) -int(network_old_result['tx'])
return result
if __name__ =="__main__":
docker_client = Client(base_url='unix://var/run/docker.sock',version='1.19')
if len(sys.argv) == 3:
container_name=sys.argv[1]
collect_item=sys.argv[2]
printcheck_container_stats(container_name,collect_item)
elif len(sys.argv) == 4 and sys.argv[2] =='port':
container_name=sys.argv[1]
collect_item=int(sys.argv[3])
check_stat=commands.getoutput("/usr/bin/docker exec %s netstat-ntpul|grep %s > /dev/null;echo $?" %(container_name,collect_item))
print check_stat
else:
print '1'
说明:上面脚本为通过python的docker模块去抓取数据,由于各种原因,有些机器无法安装python模块,可通过下面脚本实现:
# cat /opt/zabbix/script/zabbix_monitor_docker.py
#!/usr/bin/envpython
#-*- coding:utf-8 -*-
#email:[email protected]
import sys
importsubprocess
import time
import commands
import re
defget_memory_container_dir(memory_dir,container_name):
con_id=commands.getoutput("sudo/usr/bin/docker ps|grep %s|awk '{print $1}'" % container_name)
con_full_id=commands.getoutput("ls -al%s|grep '%s'|grep -v grep|awk '{print $NF}'" % (memory_dir,con_id))
memory_container_dir=memory_dir + '/' +con_full_id
return memory_container_dir
defget_cpu_container_dir(cpu_dir,container_name):
con_id=commands.getoutput("sudo/usr/bin/docker ps|grep %s|awk '{print $1}'" % container_name)
con_full_id=commands.getoutput("ls -al%s|grep '%s'|grep -v grep|awk '{print $NF}'" % (cpu_dir,con_id))
cpu_container_dir=cpu_dir + '/' +con_full_id
return cpu_container_dir
defget_cpu_info(container_name):
info = commands.getoutput('echo -ne"GET /containers/%s/stats?stream=false HTTP/1.1\r\n\r\n"|sudo/usr/bin/nc -U /var/run/docker.sock|grep read' % container_name)
info = eval(info)
return info
defcheck_container_stats(container_name,collect_item):
if collect_item == 'cpu_total_usage':
old_result =get_cpu_info(container_name)
new_result =get_cpu_info(container_name)
old_time = old_result['read']
new_time = new_result['read']
list_old_time = re.split('\:|\.',old_time)
list_new_time =re.split('\:|\.',new_time)
old_s = int(list_old_time[2])
new_s = int(list_new_time[2])
if old_s >= new_s:
time_interval = 2
else:
time_interval = new_s - old_s
result=(new_result['cpu_stats']['cpu_usage']['total_usage']- old_result['cpu_stats']['cpu_usage']['total_usage']) / time_interval
elif collect_item == 'cpu_system_usage':
old_result =get_cpu_info(container_name)
new_result = get_cpu_info(container_name)
old_time = old_result['read']
new_time = new_result['read']
list_old_time =re.split('\:|\.',old_time)
list_new_time =re.split('\:|\.',new_time)
old_s = int(list_old_time[2])
new_s = int(list_new_time[2])
if old_s >= new_s:
time_interval = 2
else:
time_interval = new_s - old_s
result=(new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']) / time_interval
elif collect_item == 'cpu_percent':
old_result =get_cpu_info(container_name)
new_result = get_cpu_info(container_name)
old_time = old_result['read']
new_time = new_result['read']
list_old_time =re.split('\:|\.',old_time)
list_new_time =re.split('\:|\.',new_time)
old_s = int(list_old_time[2])
new_s = int(list_new_time[2])
if old_s >= new_s:
time_interval = 2
else:
time_interval = new_s - old_s
cpu_total_usage=(new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']) / time_interval
cpu_system_uasge=(new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']) / time_interval
cpu_num=len(old_result['cpu_stats']['cpu_usage']['percpu_usage'])
result=round((float(cpu_total_usage)/float(cpu_system_uasge))*cpu_num*100.0,2)
elif collect_item == 'mem_usage':
memory_container_dir=get_memory_container_dir(memory_dir,container_name)
result=commands.getoutput("cat%s/memory.stat|grep '^rss'|grep -v grep|awk '{print $NF}'" %memory_container_dir)
elif collect_item == 'mem_limit':
memory_container_dir=get_memory_container_dir(memory_dir,container_name)
result=commands.getoutput("cat%s/memory.limit_in_bytes" % memory_container_dir)
elif collect_item == 'mem_percent':
memory_container_dir=get_memory_container_dir(memory_dir,container_name)
mem_usage=commands.getoutput("cat%s/memory.stat|grep '^rss'|grep -v grep|awk '{print $NF}'" %memory_container_dir)
mem_limit=commands.getoutput("cat%s/memory.limit_in_bytes" % memory_container_dir)
result=round(float(mem_usage)/float(mem_limit)*100.0,2)
elif collect_item == 'network_rx_bytes':
network_check_command="""sudo /usr/bin/docker exec %s ifconfigeth1|grep bytes|awk -F':' '{print $2,$3}'|awk '{print $1,$6}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name
network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
time.sleep(1)
network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
result=int(network_new_result['rx']) -int(network_old_result['rx'])
elif collect_item == 'network_tx_bytes':
network_check_command="""sudo /usr/bin/docker exec %sifconfig eth1|grep bytes|awk -F':' '{print $2,$3}'|awk '{print $1,$6}'|awk'{print "{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name
network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
time.sleep(1)
network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
result=int(network_new_result['tx']) -int(network_old_result['tx'])
return result
if __name__ =="__main__":
cpu_dir="/cgroup/cpuacct/docker"
memory_dir="/cgroup/memory/docker"
iflen(sys.argv) == 3:
container_name=sys.argv[1]
collect_item=sys.argv[2]
printcheck_container_stats(container_name,collect_item)
elif len(sys.argv) == 4 and sys.argv[2] =='port':
container_name=sys.argv[1]
collect_item=int(sys.argv[3])
check_stat=commands.getoutput("sudo /usr/bin/docker exec %s netstat-ntpul|grep %s > /dev/null;echo $?" %(container_name,collect_item))
print check_stat
else:
print '1'
脚本2,用来发现容器名
catdiscovery_cons.py
#!/usr/bin/env python
# Felix Shang
#QQ: 279379936
import commands
import sys
def docker_s():
cons = commands.getoutput("""sudo /usr/bin/docker ps|grep-v "CONTAINER ID"|awk '{print $NF}'|tr '\n' ' '""")
count_cons = len(cons.split())
if count_cons != 0:
return cons.split()
else:
return 0
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == 'docker':
infos = docker_s()
if infos != 0:
print '{'
print '\t"data":['
i = 0
cou_infos=len(infos)
for con in infos:
if i == cou_infos - 1:
print'\t\t{"{#CONTAINERNAME}":"%s"}' % con
else:
print'\t\t{"{#CONTAINERNAME}":"%s"},' % con
i = i + 1
print '\t]'
print '}'
脚本3,用来发现容器的服务(tomcat nginx),之前脚本2和脚本3是一个脚本,发现容器时出现好多问题。
# cat/opt/zabbix/script/discovery_docker_service.py
#!/usr/bin/env python
# Felix Shang
#QQ: 279379936
import commands
import sys
def docker_s():
cons = commands.getoutput("""cat /opt/zabbix/script/docker_cons.txt|grep'CONTAINERNAME'|grep -v grep|awk -F'"' '{print $4}'|tr '\n''\t'""")
#print cons
count_cons = len(cons.split())
if count_cons != 0:
return cons.split()
else:
return 0
def tomcat_s():
cons = docker_s()
if cons == 0:
sys.exit(2)
else:
cons_d = {}
for con in cons:
#print con
stat = commands.getoutput("sudo /usr/bin/docker exec %s ps -ef|grepjava|grep tomcat|grep -v grep>/dev/null;echo $?" % con)
port_list = []
if int(stat) == 0:
tomcat_config_dirs =commands.getoutput("sudo /usr/bin/docker exec %s ps -ef | grep tomcat |grep -v grep | awk -F\= '{print $2}' | awk -F'logging' '{print $1}'" %con).split()
for tomcat_config_dir intomcat_config_dirs:
tomcat_config_file =tomcat_config_dir + 'server.xml'
port =commands.getoutput("""sudo /usr/bin/docker exec %s grep"port=" %s|grep -v "shutdown"|grep -v "AJP"|grep"Connector"|awk -F\= '{print $2}'|awk '{print $1}'"""%(con,tomcat_config_file)).strip('"')
port_list.append(port)
cons_d[con] = port_list
else:
cons_d[con] = port_list
return cons_d
def nginx_s():
cons = docker_s()
if cons == 0:
sys.exit(2)
else:
cons_d = {}
for con in cons:
stat = commands.getoutput("sudo /usr/bin/docker exec %s ps -ef|grepnginx|grep -v grep>/dev/null;echo $?" % con)
port_list = []
if int(stat) == 0:
port_list =commands.getoutput("sudo /usr/bin/docker exec %s netstat -ntpul|grepnginx|grep -v 40080|awk '{print $4}'|awk -F\: '{print $NF}'|tr '\n' ' '" %con).split()
cons_d[con] = port_list
else:
cons_d[con] = port_list
return cons_d
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == 'tomcat':
infos = tomcat_s()
print '{'
print '\t"data":['
port_infos = []
for con_info in infos:
if len(infos[con_info]) == 0:
continue
else:
for port in infos[con_info]:
port_info ='\t\t{"{#CONTAINERNAME}":"%s","{#CON_TOMCAT_PORT}":"%s"},'%(con_info,port)
port_infos.append(port_info)
i = 0
cou_port_infos = len(port_infos)
for port_i in port_infos:
if i == cou_port_infos - 1:
port_i = port_i[0:-1]
print port_i
i = i + 1
print '\t]'
print '}'
elif len(sys.argv) == 2 and sys.argv[1] == 'nginx':
infos = nginx_s()
print '{'
print '\t"data":['
port_infos = []
for con_info in infos:
if len(infos[con_info]) == 0:
continue
else:
for port in infos[con_info]:
port_info ='\t\t{"{#CONTAINERNAME}":"%s","{#CON_NGINX_PORT}":"%s"},'%(con_info,port)
port_infos.append(port_info)
i = 0
cou_port_infos = len(port_infos)
for port_i in port_infos:
if i == cou_port_infos - 1:
port_i = port_i[0:-1]
print port_i
i = i + 1
print '\t]'
print '}'
#else:
# help_s()
#vim /etc/sudoers //zabbix_agent是通过zabbix用户执行,通过sudo提权让zabbix用户对脚本有执行权限。
zabbix ALL=(root) NOPASSWD:/usr/bin/docker,/sbin/fdisk,/usr/sbin/dmidecode,/usr/bin/nc
Zabbix服务端的配置:
导入模板:Template docker, 宿主机关联此模板即可。
报错:
Server获取值报错:ZBX_NOTSUPPORTED][Timeout while executing a shell script.]
# vim zabbix_agentd.conf
# 设置超时时间
Timeout=30