hadoop平台是原生态安装的,安装完各模块发现没有统一监控界面,经网上查询说ganglia是个不错的选择,开始在网上大量搜索关于如何用ganglia监控的资料,碰到各种报错,就是出不来想要的结果,拿就用最原始的方法,看沉下心来看官方文档,理解参数的意义,各种尝试组合,终于在3天下班的前一刻出来自己想要的结果,也许现在的理解还不是最准确的,但是为了以后更好的研究,暂时把自己安装的过程记录下来以便后续研究有个参考
我的hadoop环境如下 masternode1-masternode2 HA 中的namenode datenode(slavenode1~7)
hbase:从节点 (slavenode1~4) 主节点masternode1-masternode2 ha
spark( slavenode1~7)
我的 ganglia环境是slavenode8这台机器配置ganglia主,客户端在hadoop的每台机器都安装
1. 所需软件包安装
yum install –y gcc gcc-c++ libpng freetype zlib libdbi apr* libxml2-devel pkg-config glib pixman pango pango-devel freetye-
2. 下载并编译安装expat
wget http://jaist.dl.sourceforge.net/project/expat/expat/2.1.0/expat-2.1.0.tar.gz
tar -xf expat-2.1.0.tar.gz && cd expat-2.1.0 && ./configure --prefix=/usr/local/expat && make && make install && cd ..
[root@slavenode8 hadoop]# mkdir /usr/local/expat/lib64 && cp -a /usr/local/expat/lib/* /usr/local/expat/lib64/
3. 下载并编译安装confuse
1) #wget http://ftp.twaren.net/Unix/NonGNU//confuse/confuse-2.7.tar.gz
2) tar -xf confuse-2.7.tar.gz && cd confuse-2.7 && ./configure CFLAGS=-fPIC --disable-nls --prefix=/usr/local/confuse && make && make install && cd ..
64bit机器需要拷贝动态链接库
mkdir -p /usr/local/confuse/lib64 && cp -a -f /usr/local/confuse/lib/* /usr/local/confuse/lib64/
4. 安装libconfuse
wget http://pkgs.repoforge.org/libconfuse/libconfuse-devel-2.6-2.el5.rf.x86_64.rpm
wget http://pkgs.repoforge.org/libconfuse/libconfuse-2.6-2.el5.rf.x86_64.rpm
rpm -ivh libconfuse-*
5. 安装pcre
wget http://ftp.exim.llorien.org/pcre/pcre-8.33.tar.gz
tar zxvf pcre-8.33.tar.gz && cd pcre-8.33 &&./configure --prefix=/usr --libdir=/usr/lib64 && make && make install
6. 安装RRDTool,存储ganglia收集的数据
cd /opt/hadoop
wget http://oss.oetiker.ch/rrdtool/pub/rrdtool.tar.gz
tar zxvf rrdtool* && cd rrdtool-* && ./configure --prefix=/usr && make -j8 && make install
which rrdtool
ldconfig -p | grep rrd
7. 安装ganglia服务端
wget http://jaist.dl.sourceforge.net/project/ganglia/ganglia%20monitoring%20core/3.6.0/ganglia-3.6.0.tar.gz
tar -xf ganglia-3.6.0.tar.gz && cd ganglia-3.6.0
./configure --with-gmetad --sysconfdir=/etc/ganglia --disable-python
make && make install
(如果出现/usr/local/lib/libpython2.7.a: could not read symbols: Bad value 错误,需要在编译的时候加入 --disable-python )
cp gmond/gmond.init /etc/rc.d/init.d/gmond
cp gmetad/gmetad.init /etc/rc.d/init.d/gmetad
chkconfig --add gmond && chkconfig gmond on
chkconfig --add gmetad && chkconfig gmetad on
修改/etc/rc.d/init.d/gmetad和/etc/rc.d/init.d/gmond,分别指定如下参数:
vi /etc/rc.d/init.d/gmetad
GMETAD=/usr/local/sbin/gmetad
vi /etc/rc.d/init.d/gmond
GMOND=/usr/local/sbin/gmond
mkdir /etc/ganglia
gmond -t |tee/etc/ganglia/gmond.conf
cp gmetad/gmetad.conf /etc/ganglia/
mkdir -p /var/lib/ganglia/rrds
chown root:root /var/lib/ganglia/rrds
5.配置ganglia
通过修改/etc/ganglia下的gmond.conf 和 gmetad.conf文件,配置ganglia的监控配置
gmetad.init:
1) vi /etc/ganglia/gmetad.conf
setuid_username "root"
case_sensitive_hostnames 1
data_source "hadoop_namenode" masternode1:8649 masternode2:8649
data_source "hadoop_datanode" slavenode1:8650 slavenode2:8650 slavenode3:8650 slavenode4:8650 slavenode5:8650 slavenode6:8650 slavenode7:8650 此端口配置客户端时要用到
gridname "cluster-ha"
6.安装ganglia-web:
wget http://sourceforge.net/projects/ganglia/files/ganglia-web/3.6.2/ganglia-web-3.6.2.tar.gz/download
[root@slavenode8 hadoop]# cp /home/hadoop/ganglia-web-3.6.2.tar.gz .
[root@slavenode8 hadoop]# tar -zxf ganglia-web-3.6.2.tar.gz cp -r ganglia-web-3.6.2 /var/www/html/ganglia/ # this is the web interface
cd /var/www/html/ganglia/
vim Makefile,修改参数GDESTDIR,然后执行make install进行安装:
GDESTDIR = /var/www/html/ganglia
make install
cp conf_default.php conf.php
vim conf.php,修改如下参数:
$conf['gmetad_root'] = "/var/lib/ganglia";
$conf['rrds'] = "${conf['gmetad_root']}/rrds";
$conf['rrdtool'] = "/usr/bin/rrdtool";
$conf['external_location'] = "http://slavenode8/ganglia"; //把SERVER替换成自己的ip
$conf['case_sensitive_hostnames'] = false;
[root@slavenode8 confuse-2.7]# vi /etc/httpd/conf/httpd.conf
AllowOverride all
Options None
Order allow,deny
Allow from all
ServerName slavenode8:80
启动
[root@slavenode8 confuse-2.7]# service httpd restart
启动并访问:
service gmetad start
service gmond start
http://slavenode8/ganglia/ 或者http://192.168.237.239/ //把SERVER替换成自己的ganglia服务器IP
客户端安装
1. 安装依赖
[root@slavenode3 confuse-2.7]# yum -y install apr-devel apr-util check-devel cairo-devel pango-devel libxml2-devel rpm-build glib2-devel dbus-devel freetype-devel fontconfig-devel gcc-c++ expat-devel python-devel libXrender-devel pkgconfig
2. 安装confuse-2.7
[root@slavenode1 hadoop]# wget http://ftp.twaren.net/Unix/NonGNU//confuse/confuse-2.7.tar.gz
[root@masternode1 confuse-2.7]#tar xf confuse-2.7.tar.gz && cd /opt/hadoop/confuse-2.7 &&./configure CFLAGS=-fPIC --disable-nls && make && make install
3. 安装pcre
[root@slavenode1 hadoop]#wget http://ftp.exim.llorien.org/pcre/pcre-8.33.tar.gz
tar zxvf pcre-8.33.tar.gz && cd pcre-8.33 &&./configure --prefix=/usr --libdir=/usr/lib64 && make && make install
4. 安装rrdtool
[root@masternode2 hadoop]# tar -xf rrdtool.tar.gz && cd rrdtool-1.4.9 && ./configure --prefix=/usr && make -j8 && make install
5. 安装gmond客户端
[root@slavenode1 hadoop]#
wget http://jaist.dl.sourceforge.net/project/ganglia/ganglia%20monitoring%20core/3.6.0/ganglia-3.6.0.tar.gz
[root@masternode1 hadoop# tar xf ganglia-3.6.0.tar.gz && cd /opt/hadoop/ganglia-3.6.0 && ./configure --sysconfdir=/etc/ganglia --disable-python && make && make install
cp gmond/gmond.init /etc/rc.d/init.d/gmond
sed -i 's/GMOND=\/usr\/sbin\/gmond/GMOND=\/usr\/local\/sbin\/gmond/g' /etc/rc.d/init.d/gmond
cat /etc/rc.d/init.d/gmond
GMOND=/usr/local/sbin/gmond
[root@masternode1 ganglia-3.6.0]# gmond -t | tee /etc/ganglia/gmond.conf
在Masternode1和masternode2 主机上配置gmond.conf如下
vi /etc/ganglia/gmond.conf
user = root
send_metadata_interval = 15
cluster {
name = "hadoop_namenode"
owner = "root"
latlong = "unspecified"
url = "unspecified"
}
udp_send_channel {
host=masternode1
port = 8649
ttl = 1
}
udp_send_channel {
host=masternode2
port = 8649
ttl = 1
}
udp_recv_channel {
port = 8649
}
tcp_accept_channel {
port = 8649
}
以上端口是因为gmetad.conf 中的hadoop_namenode对应的端口是8649
另外七台机器(slavenode1~slavenode7)配置
user =root
send_metadata_interval = 15
cluster {
name = "hadoop_datanode" #对应gmetad机器里的名字/etc/ganglia/gmetad.conf里的名字
owner = "root"
latlong = "unspecified"
url = "unspecified"
}
udp_send_channel {
host=slavenode1
port = 8650 #对应gmetad机器里的名字/etc/ganglia/gmetad.conf里的端口
ttl = 1
}
udp_send_channel {
host=slavenode2
port = 8650
ttl = 1
}
host {
location = "unspecified"
}
udp_send_channel {
host=slavenode1
port = 8650
ttl = 1
}
udp_send_channel {
host=slavenode2
port = 8650
ttl = 1
}
udp_send_channel {
host=slavenode3
port = 8650
ttl = 1
}
udp_send_channel {
host=slavenode4
port = 8650
ttl = 1
}
udp_send_channel {
host=slavenode5
port = 8650
ttl = 1
}
udp_send_channel {
host=slavenode6
port = 8650
ttl = 1
}
udp_send_channel {
host=slavenode7
port = 8650
ttl = 1
}
udp_recv_channel {
port = 8650
retry_bind = true
}
tcp_accept_channel {
port = 8650
gzip_output = no
}
拷贝到相应的机器上
for i in {37,38,32,33,34,35,36};do scp -r /etc/ganglia/gmond.conf [email protected]$i:/etc/ganglia/gmond.conf ; done
所有机器配置hadoop里的配置文件
[root@masternode1 ~]# cat /opt/hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics2.properties
namenode.sink.ganglia.servers=slavenode8:8649
resourcemanager.sink.ganglia.servers=slavenode8:8649
mrappmaster.sink.ganglia.servers=slavenode8:8649
jobhistoryserver.sink.ganglia.servers=slavenode8:8649
maptask.sink.ganglia.servers=slavenode8:8649
reducetask.sink.ganglia.servers=slavenode8:8649
nodemanager.sink.ganglia.servers=slavenode8:8650-指7台hadoop_datanode对应的端口号
nodemanager.sink.ganglia.servers=slavenode8:8650
拷贝到其他机器
for i in {37,38,32,33,34,35,36,31,39};do scp -r /opt/hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics2.properties [email protected]$i:/opt/hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics2.properties; done
[root@masternode1 ~]# cat /opt/hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics.properties
dfs.class=org.apache.hadoop.metrics.spi.NullContext
dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
dfs.period=10
dfs.servers=slavenode8:8649此机器是配置ganglia的主机名
mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
mapred.period=10
mapred.servers=slavenode8:8649
jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
jvm.period=10
jvm.servers=slavenode1:8649
rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
rpc.period=10
rpc.servers=slavenode8:8649
ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
ugi.period=10
ugi.servers=slavenode8:8649
for i in {37,38,32,33,34,35,36,31,39};do scp -r /opt/hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics.properties [email protected]$i:/opt/hadoop/hadoop-2.7.2/etc/hadoop/hadoop-metrics.properties; done
root@masternode1 ~]# cat /opt/hadoop/hbase-1.1.5/conf/hadoop-metrics2-hbase.properties
hbase.extendedperiod = 3600
hbase.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
hbase.period=10
hbase.servers=slavenode8:8650
jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
jvm.period=10
jvm.servers=slavenode8:8650
rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
rpc.period=10
rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
rpc.period=10
rpc.servers=slavenode8:8650
rest.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
rest.period=10
rest.servers=slavenode8:8650
for i in {32,33,34,35,36,31};do scp -r /opt/hadoop/hbase-1.1.5/conf/hadoop-metrics2-hbase.properties
[email protected]$i:/opt/hadoop/hbase-1.1.5/conf/hadoop-metrics2-hbase.properties
; done