【Hadoop 2.7.x】HA 配置

一、

http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html

二、


hftest0001 hftest0002 hftest0003 hftest0004
NameNode

DataNode
JournalNode
ZkfcServer(QuorumPeerMain)
DFSZKFailoverController

三、部署

1.ssh 免密码登陆(四台server都要操作)

[root@hftest0001 ~]# useradd hadoop -g root    ==> 创建用户hadoop隶属于root组
[root@hftest0001 ~]# passwd hadoop             ==> 添加密码,密码也是hadoop
[root@hftest0001 ~]# su hadoop
[hadoop@hftest0001 ~]$ which ssh-keygen
/usr/bin/ssh-keygen
[hadoop@hftest0001 ~]# ssh-keygen                ==> 默认一直回车

把四台server的id_rsa.pub内容复制到一个authorized_keys文件中。然后把该文件复制放置在/home/hadoop/.ssh/authorized_keys

验证,相互ssh登陆
[hadoop@hftest0001 ~]# ssh hftest0001
[hadoop@hftest0001 ~]# ssh hftest0002
...
[hadoop@hftest0002 ~]# ssh hftest0001
...

2.安装JDK 配置环境变量

3. 部署zookeeper集群(奇数,我选择了3台) 在三台server部署zookeeper,配置基本相同

[root@hftest0001 zookeeper-3.4.6]# pwd
/opt/zookeeper-3.4.6

[root@hftest0001 zookeeper-3.4.6]# mkdir data
[root@hftest0001 conf]# cd conf

[root@hftest0001 conf]# pwd
/opt/zookeeper-3.4.6/conf

[root@hftest0001 conf]# cp zoo_sample.cfg zoo.cfg -p

编辑配置文件
[root@hftest0001 conf]# view zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial 
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between 
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just 
# example sakes.
dataDir=/opt/zookeeper-3.4.6/data/                        ==> data 目录
# the port at which the clients will connect    
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the 
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1

server.201=${IP}:2888:3888
server.155=${IP}:2888:3888
server.154=${IP}:2888:3888            #集群配置格式 server.X=IP:port1:port2
                                      #X可任意,集群中唯一就行。 port1 port2 可任意,不被占用即可。是集群中通信使用
                                      

[root@hftest0001 data]# cd ../data

[root@hftest0001 data]# pwd
/opt/zookeeper-3.4.6/data   

#创建文件,名称为myid,内容根据不同server而不同,对应zoo.cfg中的X。
[root@hftest0001 data]# echo X > myid        ==>不同的server上myid
                                                                   
启动(三台server上都需要启动)
[root@hftest0001 data]# cd ../bin
[root@hftest0001 bin]# pwd
/opt/zookeeper-3.4.6/bin
[root@hftest0001 bin]# ./zkServer.sh start

[root@hftest0001 data]# jps
1911


4.配置HDFS HA (四台server都需要配置,)

[hadoop@hftest0001 hadoop-2.7.1]$ pwd
/home/hadoop/hadoop-2.7.1

[hadoop@hftest0001 hadoop-2.7.1]$ mkdir tmp/name -p
[hadoop@hftest0001 hadoop-2.7.1]$ mkdir tmp/data -p
[hadoop@hftest0001 hadoop-2.7.1]$ mkdir tmp/qjournal -p

[hadoop@hftest0001 hadoop-2.7.1]$ view etc/hadoop/masters          => 配置secondaryNamenode所在的节点位置
hftest0001

[hadoop@hftest0001 hadoop-2.7.1]$ view etc/hadoop/masters          => 配置secondaryNamenode所在的节点位置
hftest0001

[hadoop@hftest0001 hadoop-2.7.1]$ cat etc/hadoop/slaves            => 配置datanode
hftest0002
hftest0003
hftest0004

[hadoop@hftest0001 hadoop-2.7.1]$ cat etc/hadoop/hdfs-site.xml    =>配置文件四台一致
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
	<property>
		<name>dfs.replication</name>
		<value>2</value>                            ==> 数据备份数量。
	</property>
	<property>
		<name>dfs.permissions.enabled</name>
		<value>false</value>
	</property>
	<property>
		<name>dfs.datanode.data.dir.perm</name>
		<value>755</value>
	</property>
	<property>
		<name>dfs.namenode.name.dir</name>
		<value>/home/hadoop/hadoop-2.7.1/tmp/name</value>
	</property>
	<property>
		<name>dfs.datanode.data.dir</name>
		<value>/home/hadoop/hadoop-2.7.1/tmp/data</value>
	</property>

	<!-- HA  -->
	<property>
		<name>dfs.nameservices</name>
		<value>HACluster</value>                ==>定义nameservices,名字任意
	</property>
	<property>
		<name>dfs.ha.namenodes.HACluster</name>
		<value>nn-201,nn-155</value>            ==>定义namenode节点名称,名字任意
	</property>
	<property>
		<name>dfs.namenode.rpc-address.HACluster.nn-201</name>
		<value>hftest0001:8020</value>
	</property>
	<property>
		<name>dfs.namenode.rpc-address.HACluster.nn-155</name>
		<value>hftest0002:8020</value>
	</property>
	<property>
		<name>dfs.namenode.http-address.HACluster.nn-201</name>
		<value>hftest0001:50070</value>
	</property>
	<property>
		<name>dfs.namenode.http-address.HACluster.nn-155</name>
		<value>hftest0002:50070</value>
	</property>
	<property>                                                    ===>定义journalNode,奇数,我选择了三台
		<name>dfs.namenode.shared.edits.dir</name>
		<value>qjournal://hftest0001:8485;hftest0002:8485;hftest0003:8485/HACluster</value>
	</property>
	<property>
		<name>dfs.client.failover.proxy.provider.HACluster</name>
		<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
	</property>
	
	<!--
	<property>
		<name>dfs.ha.fencing.method</name>
		<value>sshfence</value>
	</property>
	<property>
		<name>dfs.ha.fencing.ssh.private-key-files</name>
		<value>/home/hadoop/.ssh/id_rsa</value>
	</property>
        -->
	<property>
		<name>dfs.ha.fencing.methods</name>
		<value>shell(/bin/true)</value>
	</property>

	<property>
		<name>dfs.journalnode.edits.dir</name>
		<value>/home/hadoop/hadoop-2.7.1/tmp/qjournal</value>
	</property>
	<property>
		<name>dfs.ha.automatic-failover.enabled.HACluster</name>
		<value>true</value>
	</property>
	<property>
		<name>ha.zookeeper.quorum</name>
		<value>hftest0001:2181,hftest0002:2181,hftest0003:2181</value>
	</property>
<!--	HA -->
</configuration>

[hadoop@hftest0001 hadoop-2.7.1]$ cat etc/hadoop/core-site.xml 
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
	<property>
		<name>hadoop.tmp.dir</name>
		<value>/home/hadoop/hadoop-2.7.1/tmp</value>
	</property>
	<property>
		<name>fs.defaultFS</name>
		<value>hdfs://HACluster</value>
	</property>
</configuration>

启动:
1.格式化zk集群 hftest0001上执行
    [hadoop@hftest0001 hadoop-2.7.1]$ ./bin/hdfs zkfc -formatZK
    ...
    ...
    Successfully created /hadoop-ha/HACluster in ZK
    ...
2.启动journalnode集群 hftest0001,hftest0002,hftest0003上执行
    [hadoop@hftest0001 hadoop-2.7.1]$ ./sbin/hadoop-daemon.sh start journalnode
    [hadoop@hftest0002 hadoop-2.7.1]$ ./sbin/hadoop-daemon.sh start journalnode
    [hadoop@hftest0003 hadoop-2.7.1]$ ./sbin/hadoop-daemon.sh start journalnode    
    
    [hadoop@hftest0001 hadoop-2.7.1]$ jps
    6503 JournalNode
    ...
    
3.格式化namenode,启动namenode
    hftest0001上执行
    [hadoop@hftest0001 hadoop-2.7.1]$ ./bin/hdfs namenode -format
    [hadoop@hftest0001 hadoop-2.7.1]$ ./sbin/hadoop-daemon.sh start namenode
    
    [hadoop@hftest0001 hadoop-2.7.1]$ jps
    6503 JournalNode
    6859 NameNode
    
    hftest0002上执行
    [hadoop@hftest0002 hadoop-2.7.1]$ ./bin/hdfs namenode -bootstrapStandby
    [hadoop@hftest0002 hadoop-2.7.1]$ ./sbin/hadoop-daemon.sh start namenode
    
    [hadoop@hftest0002 hadoop-2.7.1]$ jps
    6037 DataNode
    6289 NameNode
    5787 JournalNode

4.启动datanode
    hftest0001上执行
    [hadoop@hftest0001 hadoop-2.7.1]$ ./sbin/hadoop-daemons.sh start datanode
    
    Web UI
    hftest0001: http://hftest0001:50070/dfshealth.html#tab-overview
    hftest0002: http://hftest0002:50070/dfshealth.html#tab-overview   
    
    两个namenode都是standby状态

5.字段ZKFC(FailoverController)必须在namenode上执行。让zk决定哪个namenode为active
    hftest0001上执行
    [hadoop@hftest0001 hadoop-2.7.1]$ ./sbin/hadoop-daemon.sh start zkfc
    
    hftest0002上执行
    [hadoop@hftest0002 hadoop-2.7.1]$ ./sbin/hadoop-daemon.sh start zkfc
    
    [hadoop@hftest0001 hadoop-2.7.1]$ jps
    6503 JournalNode
    6859 NameNode
    6745 DFSZKFailoverController
    
    [hadoop@hftest0002 hadoop-2.7.1]$ jps
    6037 DataNode
    6163 DFSZKFailoverController
    6289 NameNode
    5787 JournalNode
 
 
6.自动切换验证
    kill active的namenode进程,看另一台standby的namenode会不会转化为active状态


5. Yarn HA(四台机器都需要配置)

[hadoop@hftest0001 hadoop-2.7.1]$ view etc/hadoop/mapred-site.xml
<configuration>
        <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
        </property>
</configuration>


[hadoop@hftest0001 hadoop-2.7.1]$ view etc/hadoop/yarn-site.xml 
<?xml version="1.0"?>
<configuration>
	<property>
		<name>yarn.resourcemanager.hostname</name>
		<value>hftest0001</value>
        </property> 
        <property>
        	<name>yarn.nodemanager.aux-services</name>
		<value>mapreduce_shuffle</value>
	</property>

	<property>
		<name>yarn.resourcemanager.connect.retry-interval.ms</name>
		<value>2000</value>
	</property>

	<!-- HA -->
	<property>
		<name>yarn.resourcemanager.ha.enabled</name>
		<value>true</value>
	</property>
	<property>
		<name>yarn.resourcemanager.cluster-id</name>
		<value>hfmytest-yarn</value>
	</property>							
	<property>
		<name>yarn.resourcemanager.ha.rm-ids</name>
		<value>rm-201,rm-155</value>
	</property>
	<property>
		<name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
		<value>true</value>
	</property>
	<property>
		<name>yarn.resourcemanager.hostname.rm-201</name>
		<value>hftest0001</value>
	</property>
	<property>
		<name>yarn.resourcemanager.hostname.rm-155</name>
		<value>hftest0002</value>
        </property>											
       	<property>
       		<name>yarn.resourcemanager.webapp.address.rm-201</name>
        	<value>hftest0001:8088</value>		
	</property>        			 
	<property>
		<name>yarn.resourcemanager.webapp.address.rm-155</name>
		<value>hftest0002:8088</value>	
	</property>
	<property>
		<name>yarn.resourcemanager.zk-address</name>
		<value>hftest0001:2181,hftest0002:2181,hftest0003:2181</value>						
	</property>
</configuration>

hftest0001上执行
[hadoop@hftest0001 hadoop-2.7.1]$ ./sbin/start-yarn.sh 

hftest0002上执行
[hadoop@hftest0002 hadoop-2.7.1]$ ./sbin/yarn-daemon.sh start resourcemanager

验证
[hadoop@hftest0001 hadoop-2.7.1]$ ./bin/yarn rmadmin -getServiceState rm-201
active
[hadoop@hftest0001 hadoop-2.7.1]$ ./bin/yarn rmadmin -getServiceState rm-155
standby



你可能感兴趣的:(【Hadoop 2.7.x】HA 配置)