教程使用版本:
hadoop-2.8.1.tar.gz
zookeeper-3.4.10.tar.gz
Linux: Centos 7 x64 (CentOS-7-x86_64-DVD-1708)
集群规划:
主机名 | IP | 安装的软件 | 运行的进程 |
---|---|---|---|
node1 | 192.168.66.3 | jdk、hadoop | NameNode、DFSZKFailoverController(zkfc) |
node2 | 192.168.66.4 | jdk、hadoop | NameNode、DFSZKFailoverController(zkfc) |
node3 | 192.168.66.5 | jdk、hadoop | ResourceManager |
node4 | 192.168.66.6 | jdk、hadoop | ResourceManager |
node5 | 192.168.66.7 | jdk、hadoop、zookeeper | DataNode、NodeManager、JournalNode、QuorumPeerMain |
node6 | 192.168.66.8 | jdk、hadoop、zookeeper | DataNode、NodeManager、JournalNode、QuorumPeerMain |
node7 | 192.168.66.9 | jdk、hadoop、zookeeper | DataNode、NodeManager、JournalNode、QuorumPeerMain |
在每个节点上执行
其中node1 为主机名
$hostnamectl set-hostname node1
$cd /etc/sysconfig/network-scripts
找到ifcfg-eth0修改内容
TYPE="Ethernet"
PROXY_METHOD="none"
BROWSER_ONLY="no"
BOOTPROTO="static"
DEFROUTE="yes"
IPADDR=192.168.66.3 #静态IP
GATEWAY=192.168.66.2 #默认网关
NETMASK=255.255.255.0 #子网掩码
IPV4_FAILURE_FATAL="no"
IPV6INIT="yes"
IPV6_AUTOCONF="yes"
IPV6_DEFROUTE="yes"
IPV6_FAILURE_FATAL="no"
IPV6_ADDR_GEN_MODE="stable-privacy"
NAME="ens33"
UUID="1a7840e2-4b8c-4770-9537-c1aa9ddf42e2"
DEVICE="ens33"
ONBOOT="yes"
service network restart
$vi /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.66.3 node1
192.168.66.4 node2
192.168.66.5 node3
192.168.66.6 node4
192.168.66.7 node5
192.168.66.8 node6
192.168.66.9 node7
systemctl stop firewalld.service #停止firewall
systemctl disable firewalld.service #禁止firewall开机启动
首先要配置node1到node2、node3、node4、node5、node6、node7的免密码登陆
在node1上生产一对钥匙
ssh-keygen -t rsa
将公钥拷贝到其他节点,包括自己
ssh-coyp-id node1
ssh-coyp-id node2
ssh-coyp-id node3
ssh-coyp-id node4
ssh-coyp-id node5
ssh-coyp-id node6
ssh-coyp-id node7
配置node3到node4、node5、node6、node7的免密码登陆
在node3上生产一对钥匙
ssh-keygen -t rsa
将公钥拷贝到其他节点
ssh-coyp-id node4
ssh-coyp-id node5
ssh-coyp-id node6
ssh-coyp-id node7
注意:两个namenode之间要配置ssh免密码登陆,别忘了配置node2到node1的免登陆
在node2上生产一对钥匙
ssh-keygen -t rsa
ssh-coyp-id -i node1
Linux x64 180.99 MB jdk-8u152-linux-x64.tar.gz
$vi /etc/profile
JAVA_HOME=/home/ning/app/jdk1.8.0_152
JRE_HOME=$JAVA_HOME/jre
PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin
CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
HADOOP_HOME=/home/ning/app/hadoop-2.8.1
export JAVA_HOME
export JRE_HOME
export PATH
export CLASSPATH
export HADOOP_HOME
安装配置zooekeeper集群(在node5上)
1. 解压
tar -zxvf zookeeper-3.4.10.tar.gz -C /home/hadoop/app/
2. 修改配置
```
cd /home/hadoop/app/zookeeper-3.4.5/conf/
cp zoo_sample.cfg zoo.cfg
vi zoo.cfg
```
修改:dataDir=/home/hadoop/app/zookeeper-3.4.5/tmp
在最后添加:
server.1=node5:2888:3888
server.2=node6:2888:3888
server.3=node7:2888:3888
3. 保存退出
4. 然后创建一个tmp文件夹
mkdir /home/hadoop/app/zookeeper-3.4.10/tmp
echo 1 > /home/hadoop/app/zookeeper-3.4.10/tmp/myid
5. 将配置好的zookeeper拷贝到其他节点(首先分别在hadoop06、hadoop07根目录下创建一个hadoop目录:mkdir /hadoop)
scp -r /home/hadoop/app/zookeeper-3.4.5/ hadoop06:/home/hadoop/app/
scp -r /home/hadoop/app/zookeeper-3.4.5/ hadoop07:/home/hadoop/app/
注意:修改hadoop06、hadoop07对应/hadoop/zookeeper-3.4.5/tmp/myid内容
hadoop06:
echo 2 > /home/hadoop/app/zookeeper-3.4.5/tmp/myid
hadoop07:
echo 3 > /home/hadoop/app/zookeeper-3.4.5/tmp/myid
解压
tar -zxvf hadoop-2.6.4.tar.gz -C /home/hadoop/app/
配置HDFS
(hadoop2.0所有的配置文件都在$HADOOP_HOME/etc/hadoop目录下)
将hadoop添加到环境变量中
vi /etc/profile
export JAVA_HOME=/usr/java/jdk1.7.0_55
export HADOOP_HOME=/hadoop/hadoop-2.6.4
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin
修改hadoo-env.sh
export JAVA_HOME=/home/hadoop/app/jdk1.7.0_55
<configuration>
<property>
<name>fs.defaultFSname>
<value>hdfs://ns1/value>
property>
<property>
<name>hadoop.tmp.dirname>
<value>/home/ning/app/allAppFiles/hadoop/tmpDir/value>
property>
<property>
<name>ha.zookeeper.quorumname>
<value>node5:2181,node6:2181,node7:2181value>
property>
configuration>
<configuration>
<property>
<name>dfs.nameservicesname>
<value>ns1value>
property>
<property>
<name>dfs.ha.namenodes.ns1name>
<value>nn1,nn2value>
property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn1name>
<value>node1:9000value>
property>
<property>
<name>dfs.namenode.http-address.ns1.nn1name>
<value>node1:50070value>
property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn2name>
<value>node2:9000value>
property>
<property>
<name>dfs.namenode.http-address.ns1.nn2name>
<value>node2:50070value>
property>
<property>
<name>dfs.namenode.shared.edits.dirname>
<value>qjournal://node5:8485;node6:8485;node7:8485/ns1value>
property>
<property>
<name>dfs.journalnode.edits.dirname>
<value>/home/ning/app/allAppFiles/journalDatavalue>
property>
<property>
<name>dfs.ha.automatic-failover.enabledname>
<value>truevalue>
property>
<property>
<name>dfs.client.failover.proxy.provider.ns1name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
property>
<property>
<name>dfs.ha.fencing.methodsname>
<value>
sshfence
shell(/bin/true)
value>
property>
<property>
<name>dfs.ha.fencing.ssh.private-key-filesname>
<value>/home/ning/.ssh/id_rsavalue>
property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeoutname>
<value>30000value>
property>
configuration>
<configuration>
<property>
<name>mapreduce.framework.namename>
<value>yarnvalue>
property>
configuration>
<configuration>
<property>
<name>yarn.resourcemanager.ha.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.cluster-idname>
<value>yrcvalue>
property>
<property>
<name>yarn.resourcemanager.ha.rm-idsname>
<value>rm1,rm2value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm1name>
<value>node3value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm2name>
<value>node4value>
property>
<property>
<name>yarn.resourcemanager.zk-addressname>
<value>node5:2181,node6:2181,node7:2181value>
property>
<property>
<name>yarn.nodemanager.aux-servicesname>
<value>mapreduce_shufflevalue>
property>
configuration>
因为要在node1上启动HDFS、在node3启动yarn,所以
node1上的slaves文件指定的是datanode的位置,
node3上的slaves文件指定的是nodemanager的位置)
node5
node6
node7
启动zookeeper集群(分别在node5、node6、node7上启动zk)
cd /hadoop/zookeeper-3.4.10/bin/
./zkServer.sh start
查看状态:一个leader,两个follower
./zkServer.sh status
启动journalnode(分别在在mini5、mini6、mini7上执行)
cd /hadoop/hadoop-2.6.4
sbin/hadoop-daemon.sh start journalnode
运行jps命令检验,node5、node6、node7上多了JournalNode进程
格式化HDFS
在node1上执行命令:
hdfs namenode -format
格式化后会在根据core-site.xml中的hadoop.tmp.dir配置生成个文件,
这里我配置的是/hadoop/hadoop-2.6.4/tmp,
然后将/hadoop/hadoop-2.6.4/tmp拷贝到hadoop02的/hadoop/hadoop-2.6.10/下。
scp -r tmp/ hadoop02:/home/hadoop/app/hadoop-2.6.4/
也可以这样,建议hdfs namenode -bootstrapStandby
格式化ZKFC(在mini1上执行一次即可)
hdfs zkfc -formatZK
启动HDFS(在mini1上执行)
sbin/start-dfs.sh
启动YARN
(#####注意#####:是在hadoop02上执行start-yarn.sh,
把namenode和resourcemanager分开是因为性能问题,因为他们都要占用大量资源,所以把他们分开了,他们分开了就要分别在不同的机器上启动)
sbin/start-yarn.sh
node4上
yarn-daemon.sh start resourcemanager
浏览器访问:
http://node1:50070
NameNode ‘node1:9000’ (active)
http://node2:50070
NameNode ‘node2:9000’ (standby)
http://node4:8088