(一) 监控工具安装(sysstat、ifstat、iftop、iotop等)
(1) sysstat系统性能监控工具(sar、pidstat、iostat、mpstat等)
wget -P ./ http://pagesperso-orange.fr/sebastien.godard/sysstat-9.1.1.tar.gz
tar -zxvf sysstat-9.1.1.tar.gz&&cd sysstat-9.1.1&&./configure --prefix=/usr/local/sysstat/ --enable-install-cron&&make&&make install
(2) ifstat监控网络流量工具
wget -P ./ http://gael.roualland.free.fr/ifstat/ifstat-1.1.tar.gz
tar -zxvf ifstat-1.1.tar.gz &&cd ifstat-1.1&&./configure --prefix=/usr/local/ifstat&&make&&make install
(3) iftop监控网络流量工具
CentOS上安装所需依赖包:
yum install libpcap libpcap-devel ncurses ncurses-devel flex byacc
Debian上安装所需依赖包:
apt-get install libpcap libpcap-devel ncurses ncurses-devel flex byacc
下载iftop的源代码安装:
wget http://www.ex-parrot.com/~pdw/iftop/download/iftop-0.17.tar.gz
tar zxvf iftop-0.17.tar.gz && cd iftop-0.17 && ./configure --prefix=/usr/local/iftop && make && make install
(4) 设置PATH
vi /etc/profile
PATH=$PATH:/usr/local/sysstat/bin/:/usr/local/ifstat/bin/:/usr/local/iftop/sbin/
export PATH
. /etc/profile
(5) Linux进程的实时IO读写情况(pidstat、python脚本、iotop)
Kernel < 2.6.20
#!/usr/bin/python
# Monitoring per-process disk I/O activity
# written by http://www.vpsee.com
import sys, os, time, signal, re
class DiskIO:
def __init__(self, pname=None, pid=None, reads=0, writes=0):
self.pname = pname
self.pid = pid
self.reads = 0
self.writes = 0
def main():
argc = len(sys.argv)
if argc != 1:
print"usage: ./iotop"
sys.exit(0)
if os.getuid() != 0:
print"must be run as root"
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
os.system('echo 1 > /proc/sys/vm/block_dump')
print"TASK PID READ WRITE"
while True:
os.system('dmesg -c > /tmp/diskio.log')
l = []
f = open('/tmp/diskio.log','r')
line = f.readline()
while line:
m = re.match(\
'^(\S+)\((\d+)\): (READ|WRITE) block (\d+) on (\S+)', line)
if m != None:
ifnot l:
l.append(DiskIO(m.group(1), m.group(2)))
line = f.readline()
continue
found = False
for itemin l:
if item.pid == m.group(2):
found = True
if m.group(3) =="READ":
item.reads = item.reads + 1
elif m.group(3) =="WRITE":
item.writes = item.writes + 1
ifnot found:
l.append(DiskIO(m.group(1), m.group(2)))
line = f.readline()
time.sleep(1)
for itemin l:
print"%-10s %10s %10d %10d" % \
(item.pname, item.pid, item.reads, item.writes)
def signal_handler(signal, frame):
os.system('echo 0 > /proc/sys/vm/block_dump')
sys.exit(0)
if __name__=="__main__":
main()
Kernel >= 2.6.20 && Python V>2.5 : iotop
(二) 性能数据获取脚本
#!/bin/bash
userid=root
proc_name=XXX
proc_path=""
data_path=/home/collector/wugang/data_$(date +%Y%m%d)
filter_port=":22"
export data_path
mkdir -p $data_path
while :
do
#根据进程名得到进程号
proc_pid1=$(ps aux|grep "$proc_name"-master|grep -v grep |awk 'NR<=1 {print $2}');
proc_pid2=$(ps aux|grep "$proc_name"-thrift|grep -v grep |awk 'NR<=1 {print $2}');
proc_pid3=$(ps aux|grep "$proc_name"-xpc|grep -v grep |awk 'NR<=1 {print $2}');
proc_pid4=$(ps aux|grep "$proc_name"-syslog|grep -v grep |awk 'NR<=1 {print $2}');
if [ "$proc_pid1" != "" ]; then
if [ "$proc_pid4" != "" ]
then
####################################################################################
#根据进程号得到进程路径
proc_path=$(cd /proc/$proc_pid1&&ls -l exe|awk 'BEGIN {FS="-> "} NR<=1 {$2 != ""} {print $2}');
str="[$(date +%H%M%S)] $proc_path;$proc_name-master:$proc_pid1;$proc_name-thrift:$proc_pid2;$proc_name-xpc:$proc_pid3;$proc_name-syslog:$proc_pid4";
echo $str>>$data_path/sys_proc_$(date +%Y%m%d).txt;
echo "$proc_pid1 $proc_pid2 $proc_pid3 $proc_pid4">$data_path/last_pid_$(date +%Y%m%d).txt;
echo "">$data_path/top_proc_$(date +%Y%m%d).log;
/usr/bin/killall -9 mpstat;
/usr/bin/killall -9 netstat;
/usr/bin/killall -9 top;
#查看端口情况
#netstat -tn 1|egrep $filter_port >> $data_path/netstat_$(date +%Y%m%d).log&
#查看CPU状态
#mpstat -u -P ALL 1 10000 >> $data_path/mpstat_cpu_$(date +%Y%m%d).log&
#查看进程资源消耗
#top -d5 -u $userid -b -n 10000 >> $data_path/top_cpu_$(date +%Y%m%d).log&
top -p $proc_pid1,$proc_pid2,$proc_pid3,$proc_pid4 -d20 -b -n 30000 >> $data_path/top_proc_$(date +%Y%m%d).log&
####################################################################################
/usr/bin/killall -9 iostat;
/usr/bin/killall -9 pidstat;
/usr/bin/killall -9 ifstat;
/usr/bin/killall -9 sar;
echo "Device: tps kB_read/s kB_wrtn/s kB_read kB_wrtn">$data_path/iostat_tps_$(date +%Y%m%d).log;
echo "Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util">$data_path/iostat_response_$(date +%Y%m%d).log;
echo "avg-cpu: %user %nice %system %iowait %steal %idle">$data_path/iostat_cpu_$(date +%Y%m%d).log;
echo "Time,PID,%usr,%system,%CPU,VSZ,RSS,%MEM,kB_rd/s,kB_wr/s,Command">$data_path/proc_info_$(date +%Y%m%d).log;
#echo "HH:MM:SS,KB/sin, KB/sout">$data_path/network_io_$(date +%Y%m%d).log;
echo "HH:MM:SS,IFACE,rxpck/s,txpck/s,rxkB/s,txkB/s">$data_path/network_io_$(date +%Y%m%d).log;
#查看TPS和吞吐量
iostat -N -t -k -d 20 30000 | grep sda >> $data_path/iostat_tps_$(date +%Y%m%d).log&
#查看设备使用率(%util)和响应时间(await)
iostat -N -t -x -k -d 20 30000 | grep sda >> $data_path/iostat_response_$(date +%Y%m%d).log&
#查看cpu状态
iostat -c 20 30000 >> $data_path/iostat_cpu_$(date +%Y%m%d).log&
#查看进程资源消耗
pidstat -u -r -d -h -p $proc_pid1,$proc_pid2,$proc_pid3,$proc_pid4 20 30000|awk '{print strftime("%T",$1)","$2","$3","$4","$6","$10","$11","$12","$13","$14","$16}'>> $data_path/proc_info_$(date +%Y%m%d).log&
#查看网络流量
#ifstat -t|awk '{print $1","$2","$3}' >> $data_path/network_io_$(date +%Y%m%d).log&
sar -n DEV 10 30000|awk '{print $1""$2","$3","$4","$5","$6","$7}' >> $data_path/network_io_$(date +%Y%m%d).log&
break 1
fi
fi
sleep 1s
done
(三) 原始数据处理脚本(To CSV)
#!/bin/bashi
date_str=$(date +%Y%m%d)
data_path=/home/collector/wugang/data_"$date_str"
csv_path=/home/collector/wugang/csv_"$date_str"
mkdir -p $csv_path
proc_pid1=$(cat $data_path/last_pid_"$date_str".txt|awk '{print $1}')
proc_pid2=$(cat $data_path/last_pid_"$date_str".txt|awk '{print $2}')
proc_pid3=$(cat $data_path/last_pid_"$date_str".txt|awk '{print $3}')
proc_pid4=$(cat $data_path/last_pid_"$date_str".txt|awk '{print $4}')
if [ "$proc_pid1" != "" ]; then
echo "TOP-PID,VIRT,RES,%MEM,%CPU,TIME+,COMMAND">$csv_path/top_proc_"$proc_pid1"_"$date_str".csv && cat $data_path/top_proc_"$date_str".log|grep -iv u|grep 0|grep "$proc_pid1 "|awk '{print $1","$5","$6","$10","$9","$11","$12}'>>$csv_path/top_proc_"$proc_pid1"_"$date_str".csv;
echo "Time,PID,%usr,%system,%CPU,VSZ,RSS,%MEM,kB_rd/s,kB_wr/s,Command">$csv_path/proc_info_"$proc_pid1"_"$date_str".csv && cat $data_path/proc_info_"$date_str".log|grep -v ",,"|grep -v "#"|grep ",$proc_pid1," >> $csv_path/proc_info_"$proc_pid1"_"$date_str".csv;
fi
if [ "$proc_pid2" != "" ]; then
echo "TOP-PID,VIRT,RES,%MEM,%CPU,TIME+,COMMAND">$csv_path/top_proc_"$proc_pid2"_"$date_str".csv && cat $data_path/top_proc_"$date_str".log|grep -iv u|grep 0|grep "$proc_pid2 "|awk '{print $1","$5","$6","$10","$9","$11","$12}'>>$csv_path/top_proc_"$proc_pid2"_"$date_str".csv;
echo "Time,PID,%usr,%system,%CPU,VSZ,RSS,%MEM,kB_rd/s,kB_wr/s,Command">$csv_path/proc_info_"$proc_pid2"_"$date_str".csv && cat $data_path/proc_info_"$date_str".log|grep -v ",,"|grep -v "#"|grep ",$proc_pid2," >> $csv_path/proc_info_"$proc_pid2"_"$date_str".csv;
fi
if [ "$proc_pid3" != "" ]; then
echo "TOP-PID,VIRT,RES,%MEM,%CPU,TIME+,COMMAND">$csv_path/top_proc_"$proc_pid3"_"$date_str".csv && cat $data_path/top_proc_"$date_str".log|grep -iv u|grep 0|grep "$proc_pid3 "|awk '{print $1","$5","$6","$10","$9","$11","$12}'>>$csv_path/top_proc_"$proc_pid3"_"$date_str".csv;
echo "Time,PID,%usr,%system,%CPU,VSZ,RSS,%MEM,kB_rd/s,kB_wr/s,Command">$csv_path/proc_info_"$proc_pid3"_"$date_str".csv && cat $data_path/proc_info_"$date_str".log|grep -v ",,"|grep -v "#"|grep ",$proc_pid3," >> $csv_path/proc_info_"$proc_pid3"_"$date_str".csv;
fi
if [ "$proc_pid4" != "" ]; then
echo "TOP-PID,VIRT,RES,%MEM,%CPU,TIME+,COMMAND">$csv_path/top_proc_"$proc_pid4"_"$date_str".csv && cat $data_path/top_proc_"$date_str".log|grep -iv u|grep 0|grep "$proc_pid4 "|awk '{print $1","$5","$6","$10","$9","$11","$12}'>>$csv_path/top_proc_"$proc_pid4"_"$date_str".csv;
echo "Time,PID,%usr,%system,%CPU,VSZ,RSS,%MEM,kB_rd/s,kB_wr/s,Command">$csv_path/proc_info_"$proc_pid4"_"$date_str".csv && cat $data_path/proc_info_"$date_str".log|grep -v ",,"|grep -v "#"|grep ",$proc_pid4," >> $csv_path/proc_info_"$proc_pid4"_"$date_str".csv;
fi
echo "HH:MM:SS,IFACE,rxpck/s,txpck/s,rxkB/s,txkB/s" > $csv_path/network_io_new_"$date_str".csv && cat $data_path/network_io_"$date_str".log|grep eth0|grep -v ",,"|grep -v "IF" >> $csv_path/network_io_new_"$date_str".csv
echo "Device:,tps,kB_read/s,kB_wrtn/s,kB_read,kB_wrtn">$csv_path/iostat_tps_"$date_str".csv && cat $data_path/iostat_tps_"$date_str".log|grep -v D|awk '{print $1","$2","$3","$4","$5","$6}'>>$csv_path/iostat_tps_"$date_str".csv;
echo "Device:,rrqm/s,wrqm/s,r/s,w/s,rkB/s,wkB/s,avgrq-sz,avgqu-sz,await,svctm,%util">$csv_path/iostat_response_"$date_str".csv && cat $data_path/iostat_response_"$date_str".log|grep -v D|awk '{print $1","$2","$3","$4","$5","$6","$7","$8","$9","$10","$11","$12}'>>$csv_path/iostat_response_"$date_str".csv;
echo "avg-cpu:%user,%nice,%system,%iowait,%steal,%idle">$csv_path/iostat_cpu_"$date_str".csv && cat $data_path/iostat_cpu_"$date_str".log|grep -iv D|awk '{print $1","$2","$3","$4","$5","$6}'|grep -v ",,">>$csv_path/iostat_cpu_"$date_str".csv;
cp -fr $data_path/sys_proc*.txt $csv_path/
cp -fr $data_path/last_pid*.txt $csv_path/
(四) 根据CSV数据作图(Excel,Other Way)
(五) 结果分析和结论