hadoop 高可用集群搭建

#关闭防火墙
systemctl disable firewalld
systemctl stop firewalld

#关闭selinux  /etc/selinux/config  将SELINUX=enforcing 改为SELINUX=disabled
sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config
setenforce 0

#创建用户组以及用户
groupadd -g 1000 rdato
useradd  -u 1001 -g rdato rdato
mkdir -p /u01/
chown -R rdato:rdato /u01/

#配置环境变量
cat >> /home/rdato/.bash_profile << EOF
JAVA_HOME=/usr/java/jdk1.8.0_131; export JAVA_HOME
SCALA_HOME=/usr/share/scala; export SCALA_HOME
HADOOP_HOME=/u01/hadoop; export HADOOP_HOME
HIVE_HOME=/u01/hive/; export HIVE_HOME
HBASE_HOME=/u01/hbase; export HBASE_HOME
SPARK_HOME=/u01/spark; export SPARK_HOME
PATH=/usr/java/jdk1.8.0_131/bin:/usr/share/scala/bin:/u01/hadoop/bin:/u01/hive/bin:/u01/hbase/bin:/u01/spark/bin:$PATH; export PATH
EOF

#配置/etc/hosts
cat >> /etc/hosts << EOF
192.168.121.161   zkp1
192.168.121.162   zkp2
192.168.121.163   zkp3
192.168.121.164   nn1rm2
192.168.121.165   nn2rm1
192.168.121.166   datan1
192.168.121.167   datan2
192.168.121.168   datan3
EOF

#更改主机名
hostnamectl set-hostname  XXXX

#更改rdato用户密码
passwd rdato

#配置ssh 互信
su - rdato
ssh-keygen -t rsa


#将所有节点的密钥合并在一台服务器上
cd .ssh
ssh rdato@nn1rm2 cat /home/rdato/.ssh/id_rsa.pub>> authorized_keys
ssh rdato@nn2rm1 cat /home/rdato/.ssh/id_rsa.pub>> authorized_keys

#将合成的key 分发到其他节点
scp -r /home/rdato/.ssh/authorized_keys rdato@nn1rm2:/home/rdato/.ssh/
scp -r /home/rdato/.ssh/authorized_keys rdato@nn2rm1:/home/rdato/.ssh/ 

#修改权限
chmod 600 /home/rdato/.ssh/authorized_keys

#验证互通
ssh sparkgc XXXX date


#########################################安装jdk
yum install -y jdk-8u131-linux-x64.rpm

#########################################安装scala
yum install -y scala-2.12.6.rpm

#########################################安装Zookeeper
su - rdato
cd /u01
tar -zxvf zookeeper-3.4.9.tar.gz
mv zookeeper-3.4.9 /u01/zookeeper
cp /u01/zookeeper/conf/zoo_sample.cfg /u01/zookeeper/conf/zoo.cfg

#创建datadir目录
mkdir -p /u01/zookeeper/data/

#修改 ########################################################################     zoo.cfg 文件里datadir参数,并添加
cat >> /u01/zookeeper/conf/zoo.cfg << EOF
dataDir=/u01/zookeeper/data
server.1=zkp1:42888:43888
server.2=zkp2:42888:43888
server.3=zkp3:42888:43888
EOF

节点分别配置
echo 1 > /u01/zookeeper/data/myid
echo 2 > /u01/zookeeper/data/myid
echo 3 > /u01/zookeeper/data/myid

#各自启动zookeeper
/u01/zookeeper/bin/zkServer.sh start
#所有节点启动完查看zk状态
/u01/zookeeper/bin/zkServer.sh status


#########################################安装hadoop
cd /u01
tar -zxvf hadoop-2.8.3.tar.gz
mv hadoop-2.8.3 hadoop

#############################################################################################################单节点配置#######并scp其他所有节点
#配置  /u01/hadoop/etc/hadoop/hadoop-env.sh 和 yarn-env.sh
cat >> /u01/hadoop/etc/hadoop/hadoop-env.sh << EOF
export JAVA_HOME=/usr/java/jdk1.8.0_131
EOF

cat >> /u01/hadoop/etc/hadoop/yarn-env.sh << EOF
export JAVA_HOME=/usr/java/jdk1.8.0_131
EOF

#配置 vi /u01/hadoop/etc/hadoop/core-site.xml 在configuration中加入


    fs.trash.interval
    1400
    HDFS文件删除后先进入垃圾回收站,回收站最长保留时间为1天


    fs.defaultFS
    hdfs://sparkcluster
    HA部署方式下namenode访问地址 hdfs-site.xml中的dfs.nameservices


    hadoop.tmp.dir
    /u01/hadoop/tmp
    hadoop临时目录


    ha.zookeeper.quorum
    zkp1:2181,zkp2:2181,zkp3:2181
    zookeeper地址

#配置 vi /u01/hadoop/etc/hadoop/hdfs-site.xml 在configuration中加入


    dfs.nameservices
    sparkcluster
    hdfs的nameservice为sparkcluster,需要和core-site.xml中的保持一致


    dfs.ha.namenodes.sparkcluster
    nn1,nn2
    sparkcluster下面有两个NameNode,分别是nn1,nn2


    dfs.namenode.rpc-address.sparkcluster.nn1
    nn1rm2:9000
    nn1的RPC通信地址


    dfs.namenode.http-address.sparkcluster.nn1
    nn1rm2:50070
    nn1的http通信地址


    dfs.namenode.rpc-address.sparkcluster.nn2
    nn2rm1:9000
    nn2的RPC通信地址


    dfs.namenode.http-address.sparkcluster.nn2
    nn2rm1:50070
    nn2的http通信地址


    dfs.namenode.shared.edits.dir
    qjournal://datan1:8485;datan2:8485;datan3:8485/sparkcluster
    NameNode的元数据在JournalNode上的存放位置


    dfs.journalnode.edits.dir
    /u01/hadoop/journal
    JournalNode在本地磁盘存放数据的位置


    dfs.ha.automatic-failover.enabled
    true
    开启NameNode失败自动切换


    dfs.client.failover.proxy.provider.sparkcluster
    org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
    配置失败自动切换实现方式


    dfs.ha.fencing.methods
    sshfence
    配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行


    dfs.ha.fencing.ssh.private-key-files
    /home/rdato/.ssh/id_rsa
    使用sshfence隔离机制时需要ssh免登陆


    dfs.replication
    3
    数据冗余份数


    ha.zookeeper.quorum
    zkp1:2181,zkp2:2181,zkp3:2181
    zookeeper地址

#复制模板
cp /u01/hadoop/etc/hadoop/mapred-site.xml.template /u01/hadoop/etc/hadoop/mapred-site.xml

#配置 vi /u01/hadoop/etc/hadoop/mapred-site.xml 在configuration中加入


   mapreduce.framework.name
   yarn
   MapReduce运行于yarn中

#配置 vi /u01/hadoop/etc/hadoop/yarn-site.xml 在configuration中加入


    yarn.resourcemanager.zk-address
    zkp1:2181,zkp2:2181,zkp3:2181
    zookeeper的连接地址


    yarn.resourcemanager.cluster-id
    SparkCluster
    RM的cluster id


    yarn.resourcemanager.ha.enabled
    true
    RM高可用


    yarn.resourcemanager.ha.rm-ids
    rm1,rm2
    RM的名字


    yarn.resourcemanager.hostname.rm1
    nn2rm1
    RM的地址1


    yarn.resourcemanager.webapp.address.rm1
    nn2rm1:8088
    RM的地址1 webaddr


    yarn.resourcemanager.hostname.rm2
    nn1rm2
    RM的地址2


    yarn.resourcemanager.webapp.address.rm2
    nn1rm2:8088
    RM的地址2 webaddr


    yarn.resourcemanager.ha.automatic-failover.enabled
    true
    rm故障自动切换


    yarn.resourcemanager.ha.automatic-failover.embedded
    true
    rm故障自动切换


    yarn.resourcemanager.ha.automatic-failover.zk-base-path
    /yarn-leader-election
    rm故障自动切换


    yarn.resourcemanager.recovery.enabled
    true
    rm自动恢复功能


    yarn.resourcemanager.store.class
    org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
    rm自动恢复功能


    yarn.nodemanager.aux-services
    mapreduce_shuffle
    nodemanager


    yarn.nodemanager.aux-services.mapreduce.shuffle.class
    org.apache.hadoop.mapred.ShuffleHandler
    nodemanager


    yarn.log-aggregation-enable
    true
    job_log


    yarn.log-aggregation.retain-seconds
    86400
    日志保存时间,单位秒

#配置 /u01/hadoop/etc/hadoop/slaves  注3.0之后是 /u01/hadoop/etc/hadoop/workers
cat > /u01/hadoop/etc/hadoop/slaves << EOF
datan1
datan2
datan3
EOF


#传送到其他节点
scp /u01/hadoop/etc/hadoop/hadoop-env.sh     rdato@nn2rm1:/u01/hadoop/etc/hadoop/
scp /u01/hadoop/etc/hadoop/yarn-env.sh       rdato@nn2rm1:/u01/hadoop/etc/hadoop/
scp /u01/hadoop/etc/hadoop/core-site.xml     rdato@nn2rm1:/u01/hadoop/etc/hadoop/
scp /u01/hadoop/etc/hadoop/hdfs-site.xml     rdato@nn2rm1:/u01/hadoop/etc/hadoop/
scp /u01/hadoop/etc/hadoop/mapred-site.xml   rdato@nn2rm1:/u01/hadoop/etc/hadoop/
scp /u01/hadoop/etc/hadoop/yarn-site.xml     rdato@nn2rm1:/u01/hadoop/etc/hadoop/
scp /u01/hadoop/etc/hadoop/slaves            rdato@nn2rm1:/u01/hadoop/etc/hadoop/


################################以下所有格式化初次仅启动需要
#第一个namenode节点格式化zookeeper上hadoop-ha目录
/u01/hadoop/bin/hdfs zkfc -formatZK

#所有namenode节点启动DFSZKFailoverController
/u01/hadoop/sbin/hadoop-daemon.sh start zkfc 

#在任意服务器上启动journal
/u01/hadoop/sbin/hadoop-daemons.sh start journalnode

#格式化第一台namenode节点并启动
/u01/hadoop/bin/hadoop namenode -format
/u01/hadoop/sbin/hadoop-daemon.sh start namenode

#在第二个namenode节点格式化并同步
/u01/hadoop/bin/hdfs namenode -bootstrapStandby
/u01/hadoop/sbin/hadoop-daemon.sh start namenode

#任意节点启动 hdfs
/u01/hadoop/sbin/start-dfs.sh


#查看namenode状态
hdfs haadmin -getServiceState nn1
#如果都是standby激活namenode
hdfs haadmin -transitionToActive --forcemanual nn1
#切换namenode 从1到2
hdfs haadmin -DFSHAadmin -failover nn1 nn2


#在nn2节点 启动 resourcemanager1
/u01/hadoop/sbin/yarn-daemon.sh start resourcemanager

#在nn1节点 启动 yarn
/u01/hadoop/sbin/start-yarn.sh

#查看集群状态
hdfs dfsadmin -report


#启动jobhistory
/u01/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver


#web访问
#hadoop集群管理界面
http://192.168.121.165:8088/
#HDFS管理界面 
http://192.168.121.164:50070/


有一次启动遇到如下问题
ERROR org.apache.hadoop.hdfs.server.datanode.DataNode: Initialization failed for Block pool (Datanode Uuid 7b189d18-eb27-4bd9-80eb-218717f10793) service to nn1rm2/192.168.121.161:9000. Exiting.
在第一次格式化dfs后,启动并使用了hadoop,后来又重新执行了格式化命令(hdfs namenode -format)
这时namenode的clusterID会重新生成,而datanode的clusterID 保持不变
需要将name目录下的 ./current/VERSION里的clusterID替代 data目录下./current/VERSION里的clusterID

你可能感兴趣的:(hadoop&&spark)