Hadoop3.3.6 HA分布式安装

Hadoop3.3.6 HA分布式安装

  1. 安装JAVA环境

  2. 安装Zookeeper环境

  3. 编译Hadoop源码

  4. 安装Hadoop

    cd /export/software
    tar -zxvf hadoop-3.3.6.tar.gz -C ../server/
    cd ../server/hadoop-3.3.6/
    
  5. 配置hadoop-env.sh

    export JAVA_HOME=/export/server/jdk1.8.0_241
    export HADOOP_PID_DIR=/export/server/hadoop-3.3.6/hadoop_pid_dir_tmp
    export HDFS_NAMENODE_USER=root
    export HDFS_DATANODE_USER=root
    export HDFS_SECONDARYNAMENODE_USER=root
    export YARN_RESOURCEMANAGER_USER=root
    export YARN_NODEMANAGER_USER=root
    export HDFS_JOURNALNODE_USER=root
    export HDFS_ZKFC_USER=root
    
    
  6. 检查Hadoop在新环境依赖

    [root@node00 bin]# ./hadoop checknative
    2023-08-29 04:52:39,162 INFO bzip2.Bzip2Factory: Successfully loaded & initialized native-bzip2 library system-native
    2023-08-29 04:52:39,164 INFO zlib.ZlibFactory: Successfully loaded & initialized native-zlib library
    2023-08-29 04:52:39,198 INFO nativeio.NativeIO: The native code was built with PMDK support, and PMDK libs were loaded successfully.
    Native library checking:
    hadoop:  true /export/server/hadoop-3.3.6-src/hadoop-dist/target/hadoop-3.3.6/lib/native/libhadoop.so.1.0.0
    zlib:    true /lib64/libz.so.1
    zstd  :  true /lib64/libzstd.so.1
    bzip2:   true /lib64/libbz2.so.1
    openssl: true /lib64/libcrypto.so
    ISA-L:   true /lib/libisal.so.2
    PMDK:    true /usr/local/lib64/libpmem.so.1.0.0
    
    # 如果出现false先在三台机器上安装
    
  7. 配置core-site.xml

    
    <configuration>
    
    	 <property>
    		    <name>fs.defaultFSname>
    		    <value>hdfs://nnsvalue>
    	 property>
    
    	 <property>
    		   <name>hadoop.tmp.dirname>
    		   <value>/export/server/hadoop-3.3.6/hadoopDatas/tempDatasvalue>
    	property>
    	
    	 <property>
    		<name>ha.zookeeper.quorumname>
    		<value>node1:2181,node2:2181,node3:2181value>
    	 property>
    
    
    	
    	 <property>
    		   <name>io.file.buffer.sizename>
    		   <value>4096value>
    	 property>
    
    	
    	 <property>
    		   <name>fs.trash.intervalname>
    		   <value>10080value>
    	 property>
    	     
    	<property>
    		<name>hadoop.http.staticuser.username>
    		<value>rootvalue>
    	property>
      
    	
    	<property>
    		<name>hadoop.proxyuser.root.hostsname>
    		<value>*value>
    	property>
    
    	 
    	<property>
    		<name>hadoop.proxyuser.root.groupsname>
    		<value>*value>
    	property>
    
    	
    	<property>
    		<name>hadoop.proxyuser.root.usersname>
    		<value>*value>
    	property>
    
    configuration>
    
    
  8. hdfs-site.xml

    <configuration>
    	
    	<property>
    		<name>dfs.nameservicesname>
    		<value>nnsvalue>
    	property>
    	
    	<property>
    		<name>dfs.ha.namenodes.nnsname>
    		<value>nn1,nn2value>
    	property>
    
    	
    	<property>
    		<name>dfs.namenode.rpc-address.nns.nn1name>
    		<value>node1:9000value>
    	property>
    
    	
    	<property>
    		<name>dfs.namenode.rpc-address.nns.nn2name>
    		<value>node2:9000value>
    	property>
    
    	
    	<property>
    		<name>dfs.namenode.http-address.nns.nn1name>
    		<value>node1:9870value>
    	property>
    	
    	<property>
    		<name>dfs.namenode.http-address.nns.nn2name>
    		<value>node2:9870value>
    	property>
    
    	
    	<property>
    		<name>dfs.namenode.shared.edits.dirname>
    		<value>qjournal://node1:8485;node2:8485;node3:8485/nnsvalue>
    	property>
    
    		  
    	<property>
    		<name>dfs.client.failover.proxy.provider.nnsname>
    		<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
    	property>
    
    
    
    	<property>
    		<name>dfs.ha.fencing.methodsname>
    		<value>sshfencevalue>
    	property>
    
    	
    	<property>
    		<name>dfs.ha.automatic-failover.enabledname>
    		<value>truevalue>
    	property>
    	
    	<property>
    		<name>dfs.client.failover.proxy.provider.nnsname>
    		<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
    	property>
    	
    	<property>
    		<name>dfs.ha.fencing.ssh.private-key-filesname>
    		<value>/root/.ssh/id_rsavalue>
    	property>
    
    	
    	<property>
    		<name>dfs.journalnode.edits.dirname>
    		<value>/export/server/hadoop-3.3.6/hadoopDatas/journalnodevalue>
    	property>
    
    
    	
    	<property>
    		<name>dfs.namenode.name.dirname>
    		<value>file:///export/server/hadoop-3.3.6/hadoopDatas/namenodeDatasvalue>
    	property>
    	
    	<property>
    		<name>dfs.datanode.data.dirname>
    		<value>file:///export/server/hadoop-3.3.6/hadoopDatas/datanodeDatasvalue>
    	property>	
    	
    	<property>
    		<name>dfs.namenode.edits.dirname>
    		<value>file:///export/server/hadoop-3.3.6/hadoopDatas/nn/editsvalue>
    	property>
    
    	<property>
    		<name>dfs.namenode.checkpoint.edits.dirname>
    		<value>file:///export/server/hadoop-3.3.6/hadoopDatas/dfs/snn/editsvalue>
    	property>
    	
    	<property>
    		<name>dfs.namenode.checkpoint.dirname>
    		<value>file:///export/server/hadoop-3.3.6/hadoopDatas/snn/namevalue>
    	property>
    
    	
    	<property>
    		<name>dfs.replicationname>
    		<value>3value>
    	property>
    	
    	<property>
    		<name>dfs.permissionsname>
    		<value>falsevalue>
    	property>
    	
    	<property>
    		<name>dfs.blocksizename>
    		<value>134217728value>
    	property>
    	
    	<property>
    		<name>dfs.hostsname>
    		<value>/export/server/hadoop-3.3.6/etc/hadoop/slavesvalue>
    	property>
    configuration>
    
  9. yarn-site.xml

    <configuration>
    	
    			  
    	
    	
    	<property>
    		<name>yarn.log-aggregation-enablename>
    		<value>truevalue>
    	property>
    	
    	
    	<property>
    		<name>yarn.resourcemanager.ha.enabledname>
    		<value>truevalue>
    	property>
    	
    	
    	<property>
    		<name>yarn.resourcemanager.cluster-idname>
    		<value>myclustervalue>
    	property>
      
    	
    	<property>
    		<name>yarn.resourcemanager.ha.rm-idsname>
    		<value>rm1,rm2value>
    	property>
      
    	
    	<property>
    		<name>yarn.resourcemanager.hostname.rm1name>
    		<value>node2value>
    	property>
      
    	
    	<property>
    		<name>yarn.resourcemanager.hostname.rm2name>
    		<value>node3value>
    	property>
      
    	
    	<property>
    		<name>yarn.resourcemanager.webapp.address.rm1name>
    		<value>node2:8088value>
    	property>
    
    	
    	<property>
    		<name>yarn.resourcemanager.webapp.address.rm2name>
    		<value>node3:8088value>
    	property>
    	
    	<property>
    	     <name>yarn.resourcemanager.address.rm1name>
    	     <value>node2:8032value>
    	property>
    	<property>
    	     <name>yarn.resourcemanager.scheduler.address.rm1name>
    	     <value>node2:8030value>
    	property>
    	<property>
    	     <name>yarn.resourcemanager.resource-tracker.address.rm1name>
    	     <value>node2:8031value>
    	property>
    	<property>
    	     <name>yarn.resourcemanager.admin.address.rm1name>
    	     <value>node2:8033value>
    	property>
    	
    	<property>
    	     <name>yarn.resourcemanager.address.rm2name>
    	     <value>node3:8032value>
    	property>
    	<property>
    	     <name>yarn.resourcemanager.scheduler.address.rm2name>
    	     <value>node3:8030value>
    	property>
    	<property>
    	     <name>yarn.resourcemanager.resource-tracker.address.rm2name>
    	     <value>node3:8031value>
    	property>
    	<property>
    	     <name>yarn.resourcemanager.admin.address.rm2name>
    	     <value>node3:8033value>
    	property>
    	
    	<property>
    		<name>yarn.resourcemanager.recovery.enabledname>
    		<value>truevalue>
    	property>
    	
    	<property>       
    		<name>yarn.resourcemanager.ha.idname>
    		<value>rm2value>
    		<description>If we want to launch more than one RM in single node, we need this configurationdescription>
    	property>
    
    	
    	<property>
    		<name>yarn.resourcemanager.store.classname>
    		<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStorevalue>
    	property>
    	
    	<property>
    		<name>yarn.resourcemanager.zk-addressname>
    		<value>node2:2181,node3:2181,node1:2181value>
    		<description>For multiple zk services, separate them with commadescription>
    	property>
    
    	 
    	<property>
    		<name>yarn.resourcemanager.ha.automatic-failover.enabledname>
    		<value>truevalue>
    		<description>Enable automatic failover; By default, it is enabled only when HA is enabled.description>
    	property>	
    
    	<property>
    		<name>yarn.client.failover-proxy-providername>
    		<value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvidervalue>
    	property>
    	
    	
    	<property>
    		<name>yarn.nodemanager.resource.cpu-vcoresname>
    		<value>4value>
    	property>
    
    	
    	<property>
    		<name>yarn.nodemanager.resource.memory-mbname>
    		<value>4096value>
    	property>
    	
    	<property>
    	 	<name>yarn.scheduler.minimum-allocation-mbname>
    		<value>1024value>
    	property>
    
    	
    	<property>
    		<name>yarn.scheduler.maximum-allocation-mbname>
    		<value>4096value>
    	property>
    
    	
    	<property>
    		<name>yarn.log-aggregation.retain-secondsname>
    		<value>2592000value>
    	property>
    	
    	<property>
    		<name>yarn.nodemanager.log.retain-secondsname>
    		<value>604800value>
    	property>
    	
    	<property>
    		<name>yarn.nodemanager.log-aggregation.compression-typename>
    		<value>gzvalue>
    	property>
    
    	
    	<property>
    		<name>yarn.nodemanager.local-dirsname>
    		<value>/export/server/hadoop-3.3.6/hadoopDatas/yarn/localvalue>
    	property>
    
    
    	
    	<property>
    		<name>yarn.resourcemanager.max-completed-applicationsname>
    		<value>1000value>
    	property>
    	
    	<property>
    		<name>yarn.nodemanager.aux-servicesname>
    		<value>mapreduce_shufflevalue>
    	property>
    
    	
    	<property>
    		<name>yarn.nodemanager.vmem-check-enabledname>
    		<value>falsevalue>
    	property>
      
    	
    	<property>
    		<name>yarn.log.server.urlname>
    		<value>http://node3:19888/jobhistory/logsvalue>
    	property>
    	
     	<property>
     	    <name>yarn.nodemanager.env-whitelistname>
     	    <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOMEvalue>
     	property>
    
    	
    configuration>
    
    
  10. mapred-site.xml

    <configuration>
    	
    	<property>
    		<name>mapreduce.framework.namename>
    		<value>yarnvalue>
    	property>
      
    	
    	<property>
    		<name>mapreduce.jobhistory.addressname>
    		<value>node3:10020value>
    	property>
      
    	
    	<property>
    		<name>mapreduce.jobhistory.webapp.addressname>
    		<value>node3:19888value>
    	property>
    
    
    	
    	<property>
    	     <name>mapreduce.jobtracker.system.dirname>
    	     <value>/export/server/hadoop-3.3.6/hadoopDatas/system/jobtrackervalue>
    	property>
    	
    	<property>
    	     <name>mapreduce.map.memory.mbname>
    	     <value>1024value>
    	property>
    	
    	
    	<property>
    	     <name>mapreduce.reduce.memory.mbname>
    	     <value>1024value>
    	property>
    	
    	
    	<property>
    	     <name>mapreduce.task.io.sort.mbname>
    	     <value>100value>
    	property>
     
    	
    	
    	<property>
    	     <name>mapreduce.task.io.sort.factorname>
    	     <value>10value>
    	property>
    	
    	<property>
    	     <name>mapreduce.reduce.shuffle.parallelcopiesname>
    	     <value>15value>
    	property>
    	<property>
    	     <name>yarn.app.mapreduce.am.command-optsname>
    	     <value>-Xmx2048mvalue>
    	property>
    	
    	<property>
    	     <name>yarn.app.mapreduce.am.resource.mbname>
    	     <value>1536value>
    	property>
    	
    	<property>
    	     <name>mapreduce.cluster.local.dirname>
    	     <value>/export/server/hadoop-3.3.6/hadoopDatas/system/localvalue>
    	property>
    configuration>
    
    
  11. workers

    node1
    node2
    node3
    
  12. 配置环境变量三台

    vim /etc/profile
    # set hadoop environment
    export HADOOP_HOME=/export/server/hadoop-3.3.6
    export PATH=:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
    
  13. 分发Hadoop

    cd /export/server
    scp -r hadoop-3.3.6/ node2:$PWD
    scp -r hadoop-3.3.6/ node3:$PWD
    
  14. 启动和初始化

    # 1. 重启三台机器
    reboot
    # 2.三台机器启动ZK
    zkServer.sh start
    zkServer.sh status
    # 3. 初始化ZK(node1执行)
    hdfs zkfc -formatZK
    # 4. 启动 journalnod(node1执行)
    hadoop-daemons.sh start journalnode
    # 5. 初始化Hadoop(node1执行)
    hdfs namenode -format
    hdfs namenode -initializeSharedEdits -force
    # 6.启动HDFS(node1执行)
    start-dfs.sh
    # 7.将node2的namenode激活并设置状态为Standby(node2执行)
    hdfs namenode -bootstrapStandby
    hadoop-daemon.sh start namenode
    # 8. node2上启动激活yarn(node2执行)
    start-yarn.sh
    # 9. node3上启动激活yarn(node3执行)
    start-yarn.sh
    # 10. 查看resourceManager的状态
    yarn rmadmin -getServiceState rm1(node2执行)
    yarn rmadmin -getServiceState rm2(node3执行)
    # 11. 启动jobhistory(node3执行)
    mr-jobhistory-daemon.sh start historyserver
    
  15. web访访问地址

    # HDFS
    http://node1:9870/dfshealth.html#tab-overview
    http://node2:9870/dfshealth.html#tab-overview
    # yarn
    http://node2:8088/cluster
    # jobHistory
    http://node3:19888/jobhistory
    
  16. 后面集群启动和关闭就不需要这么麻烦了

    # 先启动ZK三台机器启动ZK
    zkServer.sh start
    zkServer.sh status
    # 启动
    star-all.sh
    # 关闭
    stop-all.sh
    

你可能感兴趣的:(hadoop)