安装JAVA环境
安装Zookeeper环境
编译Hadoop源码
安装Hadoop
cd /export/software
tar -zxvf hadoop-3.3.6.tar.gz -C ../server/
cd ../server/hadoop-3.3.6/
配置hadoop-env.sh
export JAVA_HOME=/export/server/jdk1.8.0_241
export HADOOP_PID_DIR=/export/server/hadoop-3.3.6/hadoop_pid_dir_tmp
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export HDFS_JOURNALNODE_USER=root
export HDFS_ZKFC_USER=root
检查Hadoop在新环境依赖
[root@node00 bin]# ./hadoop checknative
2023-08-29 04:52:39,162 INFO bzip2.Bzip2Factory: Successfully loaded & initialized native-bzip2 library system-native
2023-08-29 04:52:39,164 INFO zlib.ZlibFactory: Successfully loaded & initialized native-zlib library
2023-08-29 04:52:39,198 INFO nativeio.NativeIO: The native code was built with PMDK support, and PMDK libs were loaded successfully.
Native library checking:
hadoop: true /export/server/hadoop-3.3.6-src/hadoop-dist/target/hadoop-3.3.6/lib/native/libhadoop.so.1.0.0
zlib: true /lib64/libz.so.1
zstd : true /lib64/libzstd.so.1
bzip2: true /lib64/libbz2.so.1
openssl: true /lib64/libcrypto.so
ISA-L: true /lib/libisal.so.2
PMDK: true /usr/local/lib64/libpmem.so.1.0.0
# 如果出现false先在三台机器上安装
配置core-site.xml
<configuration>
<property>
<name>fs.defaultFSname>
<value>hdfs://nnsvalue>
property>
<property>
<name>hadoop.tmp.dirname>
<value>/export/server/hadoop-3.3.6/hadoopDatas/tempDatasvalue>
property>
<property>
<name>ha.zookeeper.quorumname>
<value>node1:2181,node2:2181,node3:2181value>
property>
<property>
<name>io.file.buffer.sizename>
<value>4096value>
property>
<property>
<name>fs.trash.intervalname>
<value>10080value>
property>
<property>
<name>hadoop.http.staticuser.username>
<value>rootvalue>
property>
<property>
<name>hadoop.proxyuser.root.hostsname>
<value>*value>
property>
<property>
<name>hadoop.proxyuser.root.groupsname>
<value>*value>
property>
<property>
<name>hadoop.proxyuser.root.usersname>
<value>*value>
property>
configuration>
hdfs-site.xml
<configuration>
<property>
<name>dfs.nameservicesname>
<value>nnsvalue>
property>
<property>
<name>dfs.ha.namenodes.nnsname>
<value>nn1,nn2value>
property>
<property>
<name>dfs.namenode.rpc-address.nns.nn1name>
<value>node1:9000value>
property>
<property>
<name>dfs.namenode.rpc-address.nns.nn2name>
<value>node2:9000value>
property>
<property>
<name>dfs.namenode.http-address.nns.nn1name>
<value>node1:9870value>
property>
<property>
<name>dfs.namenode.http-address.nns.nn2name>
<value>node2:9870value>
property>
<property>
<name>dfs.namenode.shared.edits.dirname>
<value>qjournal://node1:8485;node2:8485;node3:8485/nnsvalue>
property>
<property>
<name>dfs.client.failover.proxy.provider.nnsname>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
property>
<property>
<name>dfs.ha.fencing.methodsname>
<value>sshfencevalue>
property>
<property>
<name>dfs.ha.automatic-failover.enabledname>
<value>truevalue>
property>
<property>
<name>dfs.client.failover.proxy.provider.nnsname>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
property>
<property>
<name>dfs.ha.fencing.ssh.private-key-filesname>
<value>/root/.ssh/id_rsavalue>
property>
<property>
<name>dfs.journalnode.edits.dirname>
<value>/export/server/hadoop-3.3.6/hadoopDatas/journalnodevalue>
property>
<property>
<name>dfs.namenode.name.dirname>
<value>file:///export/server/hadoop-3.3.6/hadoopDatas/namenodeDatasvalue>
property>
<property>
<name>dfs.datanode.data.dirname>
<value>file:///export/server/hadoop-3.3.6/hadoopDatas/datanodeDatasvalue>
property>
<property>
<name>dfs.namenode.edits.dirname>
<value>file:///export/server/hadoop-3.3.6/hadoopDatas/nn/editsvalue>
property>
<property>
<name>dfs.namenode.checkpoint.edits.dirname>
<value>file:///export/server/hadoop-3.3.6/hadoopDatas/dfs/snn/editsvalue>
property>
<property>
<name>dfs.namenode.checkpoint.dirname>
<value>file:///export/server/hadoop-3.3.6/hadoopDatas/snn/namevalue>
property>
<property>
<name>dfs.replicationname>
<value>3value>
property>
<property>
<name>dfs.permissionsname>
<value>falsevalue>
property>
<property>
<name>dfs.blocksizename>
<value>134217728value>
property>
<property>
<name>dfs.hostsname>
<value>/export/server/hadoop-3.3.6/etc/hadoop/slavesvalue>
property>
configuration>
yarn-site.xml
<configuration>
<property>
<name>yarn.log-aggregation-enablename>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.ha.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.cluster-idname>
<value>myclustervalue>
property>
<property>
<name>yarn.resourcemanager.ha.rm-idsname>
<value>rm1,rm2value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm1name>
<value>node2value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm2name>
<value>node3value>
property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1name>
<value>node2:8088value>
property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2name>
<value>node3:8088value>
property>
<property>
<name>yarn.resourcemanager.address.rm1name>
<value>node2:8032value>
property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm1name>
<value>node2:8030value>
property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1name>
<value>node2:8031value>
property>
<property>
<name>yarn.resourcemanager.admin.address.rm1name>
<value>node2:8033value>
property>
<property>
<name>yarn.resourcemanager.address.rm2name>
<value>node3:8032value>
property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2name>
<value>node3:8030value>
property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2name>
<value>node3:8031value>
property>
<property>
<name>yarn.resourcemanager.admin.address.rm2name>
<value>node3:8033value>
property>
<property>
<name>yarn.resourcemanager.recovery.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.ha.idname>
<value>rm2value>
<description>If we want to launch more than one RM in single node, we need this configurationdescription>
property>
<property>
<name>yarn.resourcemanager.store.classname>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStorevalue>
property>
<property>
<name>yarn.resourcemanager.zk-addressname>
<value>node2:2181,node3:2181,node1:2181value>
<description>For multiple zk services, separate them with commadescription>
property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.enabledname>
<value>truevalue>
<description>Enable automatic failover; By default, it is enabled only when HA is enabled.description>
property>
<property>
<name>yarn.client.failover-proxy-providername>
<value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvidervalue>
property>
<property>
<name>yarn.nodemanager.resource.cpu-vcoresname>
<value>4value>
property>
<property>
<name>yarn.nodemanager.resource.memory-mbname>
<value>4096value>
property>
<property>
<name>yarn.scheduler.minimum-allocation-mbname>
<value>1024value>
property>
<property>
<name>yarn.scheduler.maximum-allocation-mbname>
<value>4096value>
property>
<property>
<name>yarn.log-aggregation.retain-secondsname>
<value>2592000value>
property>
<property>
<name>yarn.nodemanager.log.retain-secondsname>
<value>604800value>
property>
<property>
<name>yarn.nodemanager.log-aggregation.compression-typename>
<value>gzvalue>
property>
<property>
<name>yarn.nodemanager.local-dirsname>
<value>/export/server/hadoop-3.3.6/hadoopDatas/yarn/localvalue>
property>
<property>
<name>yarn.resourcemanager.max-completed-applicationsname>
<value>1000value>
property>
<property>
<name>yarn.nodemanager.aux-servicesname>
<value>mapreduce_shufflevalue>
property>
<property>
<name>yarn.nodemanager.vmem-check-enabledname>
<value>falsevalue>
property>
<property>
<name>yarn.log.server.urlname>
<value>http://node3:19888/jobhistory/logsvalue>
property>
<property>
<name>yarn.nodemanager.env-whitelistname>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOMEvalue>
property>
configuration>
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.namename>
<value>yarnvalue>
property>
<property>
<name>mapreduce.jobhistory.addressname>
<value>node3:10020value>
property>
<property>
<name>mapreduce.jobhistory.webapp.addressname>
<value>node3:19888value>
property>
<property>
<name>mapreduce.jobtracker.system.dirname>
<value>/export/server/hadoop-3.3.6/hadoopDatas/system/jobtrackervalue>
property>
<property>
<name>mapreduce.map.memory.mbname>
<value>1024value>
property>
<property>
<name>mapreduce.reduce.memory.mbname>
<value>1024value>
property>
<property>
<name>mapreduce.task.io.sort.mbname>
<value>100value>
property>
<property>
<name>mapreduce.task.io.sort.factorname>
<value>10value>
property>
<property>
<name>mapreduce.reduce.shuffle.parallelcopiesname>
<value>15value>
property>
<property>
<name>yarn.app.mapreduce.am.command-optsname>
<value>-Xmx2048mvalue>
property>
<property>
<name>yarn.app.mapreduce.am.resource.mbname>
<value>1536value>
property>
<property>
<name>mapreduce.cluster.local.dirname>
<value>/export/server/hadoop-3.3.6/hadoopDatas/system/localvalue>
property>
configuration>
workers
node1
node2
node3
配置环境变量三台
vim /etc/profile
# set hadoop environment
export HADOOP_HOME=/export/server/hadoop-3.3.6
export PATH=:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
分发Hadoop
cd /export/server
scp -r hadoop-3.3.6/ node2:$PWD
scp -r hadoop-3.3.6/ node3:$PWD
启动和初始化
# 1. 重启三台机器
reboot
# 2.三台机器启动ZK
zkServer.sh start
zkServer.sh status
# 3. 初始化ZK(node1执行)
hdfs zkfc -formatZK
# 4. 启动 journalnod(node1执行)
hadoop-daemons.sh start journalnode
# 5. 初始化Hadoop(node1执行)
hdfs namenode -format
hdfs namenode -initializeSharedEdits -force
# 6.启动HDFS(node1执行)
start-dfs.sh
# 7.将node2的namenode激活并设置状态为Standby(node2执行)
hdfs namenode -bootstrapStandby
hadoop-daemon.sh start namenode
# 8. node2上启动激活yarn(node2执行)
start-yarn.sh
# 9. node3上启动激活yarn(node3执行)
start-yarn.sh
# 10. 查看resourceManager的状态
yarn rmadmin -getServiceState rm1(node2执行)
yarn rmadmin -getServiceState rm2(node3执行)
# 11. 启动jobhistory(node3执行)
mr-jobhistory-daemon.sh start historyserver
web访访问地址
# HDFS
http://node1:9870/dfshealth.html#tab-overview
http://node2:9870/dfshealth.html#tab-overview
# yarn
http://node2:8088/cluster
# jobHistory
http://node3:19888/jobhistory
后面集群启动和关闭就不需要这么麻烦了
# 先启动ZK三台机器启动ZK
zkServer.sh start
zkServer.sh status
# 启动
star-all.sh
# 关闭
stop-all.sh