1.下载hadoop
用阿里云镜像 wget http://mirrors.aliyun.com/apache/hadoop/core/stable/hadoop-2.7.2.tar.gz
2.下载jdk
wget --no-check-certificate --no-cookies --header "Cookie: oraclelicense=accept-securebackup-cookie" http://download.oracle.com/otn-pub/java/jdk/8u65-b17/jdk-8u65-linux-x64.tar.gz
3.下载zookeeper
http://mirrors.aliyun.com/apache/zookeeper/zookeeper-3.4.8/zookeeper-3.4.8.tar.gz
4.配置hosts,ssh,防火墙,selinux,JAVA_HOME,ntp
5.配置zookeeper
==============================
clientPort=2181
initLimit=10
autopurge.purgeInterval=24
syncLimit=5
tickTime=2000
dataDir=/hadoop/zookeeper
autopurge.snapRetainCount=30
server.1=master:2888:3888
server.2=node1:2888:3888
server.3=node2:2888:3888
================================
6.配置hadoop
===================================================================================
集群规划:
master: namenode,DFSZKFailoverController,resourcemanager,JournalNode,NodeManager,DataNode,QuorumPeerMain
node1: namenode,DFSZKFailoverController, JournalNode,NodeManager,DataNode,QuorumPeerMain
node2: resourcemanager,JournalNode,NodeManager,DataNode,QuorumPeerMain
====================================================================================
说明:
1.在hadoop2.0中通常由两个NameNode组成,一个处于active状态,另一个处于standby状态。Active NameNode对外提供服务,而Standby NameNode则不对外提供服务,仅同步active namenode的状态,以便能够在它失败时快速进行切换。
2. hadoop2.0官方提供了两种HDFS HA的解决方案,一种是NFS,另一种是QJM。这里我们使用简单的QJM。在该方案中,主备NameNode之间通过一组JournalNode同步元数据信息,一条数据只要成功写入多数JournalNode即认为写入成功。通常配置奇数个JournalNode
3.这里还配置了一个zookeeper集群,用于ZKFC(DFSZKFailoverController)故障转移,当Active NameNode挂掉了,会自动切换Standby NameNode为standby状态
4.hadoop-2.2.0中依然存在一个问题,就是ResourceManager只有一个,存在单点故障,hadoop-2.4.1解决了这个问题,有两个ResourceManager,一个是Active,一个是Standby,状态由zookeeper进行协调
==================================================================================================
hadoop2所有的配置文件都在$HADOOP_HOME/etc/hadoop目录下
export HADOOP_HOME=/itcast/hadoop-2.4.1
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin
========================================================
hadoop-env.sh
export JAVA_HOME=/opt/jdk1.8.0_65
core-site.xml
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://ns1</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/data/hadoop</value> </property> <property> <name>ha.zookeeper.quorum</name> <value>master:2181,node1:2181,node2:2181</value> </property> </configuration>
hdfs-site.xml
<configuration> <property> <name>dfs.nameservices</name> <value>ns1</value> </property> <property> <name>dfs.ha.namenodes.ns1</name> <value>nn1,nn2</value> </property> <property> <name>dfs.namenode.rpc-address.ns1.nn1</name> <value>master:8020</value> </property> <property> <name>dfs.namenode.http-address.ns1.nn1</name> <value>master:50070</value> </property> <property> <name>dfs.namenode.rpc-address.ns1.nn2</name> <value>node1:8020</value> </property> <property> <name>dfs.namenode.http-address.ns1.nn2</name> <value>node1:50070</value> </property> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://master:8485;node1:8485;node2:8485/ns1</value> </property> <property> <name>dfs.journalnode.edits.dir</name> <value>/data/hadoop/journal</value> </property> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <name>dfs.client.failover.proxy.provider.ns1</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <property> <name>dfs.ha.fencing.methods</name> <value> sshfence shell(/bin/true) </value> </property> <property> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/root/.ssh/id_rsa</value> </property> <property> <name>dfs.ha.fencing.ssh.connect-timeout</name> <value>30000</value> </property> </configuration>
mapred-site.xml
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>
yarn-site.xml
<configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.cluster-id</name> <value>cluster1</value> </property> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> </property> <property> <name>yarn.resourcemanager.hostname.rm1</name> <value>master</value> </property> <property> <name>yarn.resourcemanager.hostname.rm2</name> <value>node2</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm1</name> <value>master:8088</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm2</name> <value>node2:8088</value> </property> <property> <name>yarn.resourcemanager.zk-address</name> <value>master:2181,node1:2181,node2:2181</value> </property> </configuration>
7.修改slaves(slaves是指定子节点的位置,因为要在master上启动HDFS、在master启动yarn,所以master上的slaves文件指定的是datanode的位置,master上的slaves文件指定的是nodemanager的位置)
master
node1
node2
8.启动zookeeper集群
9.启动journalnode(分别在在master、node1、node2上执行)
hadoop-daemon.sh start journalnode
运行jps命令检验,master、node1、node2上多了JournalNode进程
10.格式化HDFS
master执行: hdfs namenode -format
#格式化后会在根据core-site.xml中的hadoop.tmp.dir配置生成个文件,然后将./tmp拷贝到node2下。
11.格式化ZK(在master上执行即可)
hdfs zkfc -formatZK
12.启动HDFS(在master上执行)
sbin/start-dfs.sh
13.启动YARN(#####注意#####:是在master上执行start-yarn.sh,把namenode和resourcemanager分开是因为性能问题,因为他们都要占用大量资源,所以把他们分 开了,他们分开了就要分别在不同的机器上启动)
sbin/start-yarn.sh
注意:node2上的resourcemanager需要手动启动: yarn-daemon.sh start resourcemanager