三台Linux虚拟机(这里是centos7.5,我将其分别命名为hadoop001、hadoop002、hadoop003)
jdk1.8.0_151
hadoop-2.6.0-cdh5.7.0
zookeeper-3.4.6
在三虚拟机上创建hadoop用户:
systemctl stop firewalld.service #关闭防火墙
systemctl disable firewalld.service #禁止防火墙开机自启
vi /etc/hostname #修改hostname的值为hadoop001,若机器是hadoop002,则将值修改为hadoop002
reboot boot #重启,若不重启可用使用hostname hadoop001,机器为hadoop002则 hostname hadoop002
useradd hadoop #创建hadoop用户
使用hadoop用户,并执行ssh秘钥生成命令(三台设备上都执行该操作)
su hadoop #切换到hadoop用户
ssh-keygen -t rsa
#注意:只点击回车键,不要输入其它。
#上诉命令执行成功后会在当前用户主目录下的.ssh文件夹中生成一个名id_rsa的秘钥文件和id_rsa.pub的公钥文件。可执行cd ~/.ssh 后,执行:ls 查看是否生成这两个文件
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys #将本机生成的免密登录的公钥添加到 authorized_keys 文件中,另外两台机器中生成公钥文件的内容同样添加到该文件中
chmod 600 ~/.ssh/* #修改该目录下所有文件权限为600。若不设置为600权限,在非root用户下,使用ssh免密登录可能会出现权限不足的提示
ssh [email protected] #测试是否成功免密登录其它机器。初次连接时,需要输入yes进行确认。
在/etc/hosts文件中将三台机器的ip添加到文件中(三台机器需要)。
将准备好的JDK上传到所有机器上,并在所有设备上做相同的操作
su - #切换到root用户
mkdir /usr/java #创建文件夹
cp -r jdk1.8.0_151 /usr/java/ #将jdk放置到创建的文件夹中
vi ~/.bash_profile #在当前用户的profile文件中配置环境变量:export JAVA_HOME=/usr/java/jdk1.8.0_151 export PATH=$JAVA_HOME/bin:$PATH
. ~/.bash_profile #刷新环境变量
#########################以下是三台设备同样的操作######################################
mkdir ~/app #在当前用户根目录创建app文件夹,将zookeeper在该文件夹下
ln -s /home/hadoop/app/zookeeper-3.4.6 /home/hadoop/app/zookeeper #为zookeeper创建一个软链接
vi ~/.bash_profile #配置zookeeper环境变量,在文件中添加 export ZOOKEEPER_HOME=/home/hadoop/app/zookeeper export PATH=$ZOOKEEPER_HOME/bin:$PATH
. ~/.bash_profile #使配置的环境变量重新生效
mkdir ~/app/zookeeper/data #用户存放zk的数据
cd ~/app/zookeeper/conf/ #切换到zk的配置文件目录
cp zoo_sample.cfg zoo.cfg #将zk的配置文件模板拷贝一份,并在上面做配置修改
#dataDir=/home/hadoop/app/zookeeper/data
#server.1=hadoop001:2888:3888
#server.2=hadoop002:2888:3888
#server.3=hadoop003:2888:3888
vi zoo.cfg #将上述4个配置内容添加到文件中,若文件中有上述任意配置,请将该配置修改为上述配置
cd ../data #切换到zk的数据存放区
#################################单独操作############################################
echo 1 > myid #在hadoop001中执行该命令
echo 2 > myid #在hadoop002中执行该命令
echo 3 > myid #在hadoop003中执行该命令
#############################三台设备同样操作######################################
zkServer.sh start #启动zk
zkServer.sh status #查看zk是否成功启动,注意:是在所有zk都启动成功后,执行此命令查看,出现如下信息则启动成功
#############################以下操作在所有机器上保持一直########################################
cp -r hadoop-2.6.0-cdh5.7.0 ~/app/ #将hadoop程序放在当前用户主目录下的app目录下
cd ~/app #切换到app目录
ln -s /home/hadoop/app/hadoop-2.6.0-cdh5.7.0 /home/hadoop/app/hadoop #为hadoop程序创建软链接
vi ~/.bash_profile #添加hadoop环境变量 export HADOOP_HOME=/home/hadoop/app/hadoop export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
source ~/.bash_profile #使添加的环境变量生效
vi ~/app/hadoop/etc/hadoop/hadoop-env.sh #在文件添加java所在路径 export JAVA_HOME=/usr/java/jdk1.8.0_151 若该配置存在,则直接修改
cd ~/app/hadoop/etc/hadoop #切换到hadoop的配置文件目录,修改core-size.xml、hdfs-size.xml、yarn-size.xml、mapred-site.xml、slaves五个配置文件
vi core-size.xml #使用下面core-size.xml文件内容替换
vi yarn-size.xml #使用下面yarn-size.xml文件内容替换
vi mapred-size.xml #使用下面mapred-size.xml文件内容替换
vi hdfs-size.xml #使用下面hdfs-size.xml文件内容替换
vi slaves #使用下面slaves文件内容替换
#将上述配置文件中,指向本地文件夹的路径依次创建出来
mkdir -p /home/hadoop/app/hadoop-2.6.0-cdh5.7.0/tmp
mkdir -p /home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/name
mkdir -p /home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/data
mkdir -p /home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/jn
hadoop-daemon.sh start journalnode #启动journalnode节点
#验证:使用 jps 命令 显示有JournalNode进程则表示启动成功
#############################不同操作################################
#在hadoop001中执行
hdfs namenode -format #初始化namenode,日志中出现 successfully formatted. 则表示初始化成功
scp -r /home/hadoop/app/hadoop/data/dfs/name/current hadoop@hadoop002:/home/hadoop/app/hadoop/data/dfs/name #将hadoop001中namenode初始化的信息上传到到hadoop002中,避免hadoop002 namenode 初始化出现问题
hdfs zkfc -formatZK #初始化zkfc
start-dfs.sh #启动hdfs集群 namenode datanode zkfc
#使用 jps 验证 显示有NameNode DataNode DFSZKFailoverController 则表示启动成功
start-yarn.sh #启动yarn集群
#使用 jps 验证 显示有 ResourceManager NodeManager 则表示启动成功
#在hadoop002上执行
yarn-daemon.sh start resourcemanager #启动Standby状态的ResourceManager
#在hadoop001上执行
mr-jobhistory-daemon.sh start historyserver #启动mr的历史服务器
#使用 jps 验证 显示有 JobHistoryServer 则启动成功。
fs.defaultFS
hdfs://ruozeclusterg6
fs.trash.checkpoint.interval
0
fs.trash.interval
1440
hadoop.tmp.dir
/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/tmp
ha.zookeeper.quorum
hadoop001:2181,hadoop002:2181,hadoop003:2181
ha.zookeeper.session-timeout.ms
2000
hadoop.proxyuser.hadoop.hosts
*
hadoop.proxyuser.hadoop.groups
*
io.compression.codecs
org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
org.apache.hadoop.io.compress.BZip2Codec,
org.apache.hadoop.io.compress.SnappyCodec
dfs.permissions.superusergroup
hadoop
dfs.webhdfs.enabled
true
dfs.namenode.name.dir
/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/name
namenode 存放name table(fsimage)本地目录(需要修改)
dfs.namenode.edits.dir
${dfs.namenode.name.dir}
namenode粗放 transaction file(edits)本地目录(需要修改)
dfs.datanode.data.dir
/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/data
datanode存放block本地目录(需要修改)
dfs.replication
3
dfs.blocksize
268435456
dfs.nameservices
ruozeclusterg6
dfs.ha.namenodes.ruozeclusterg6
nn1,nn2
dfs.namenode.rpc-address.ruozeclusterg6.nn1
hadoop001:8020
dfs.namenode.rpc-address.ruozeclusterg6.nn2
hadoop002:8020
dfs.namenode.http-address.ruozeclusterg6.nn1
hadoop001:50070
dfs.namenode.http-address.ruozeclusterg6.nn2
hadoop002:50070
dfs.journalnode.http-address
0.0.0.0:8480
dfs.journalnode.rpc-address
0.0.0.0:8485
dfs.namenode.shared.edits.dir
qjournal://hadoop001:8485;hadoop002:8485;hadoop003:8485/ruozeclusterg6
dfs.journalnode.edits.dir
/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/jn
dfs.client.failover.proxy.provider.ruozeclusterg6
org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
dfs.ha.fencing.methods
sshfence
dfs.ha.fencing.ssh.private-key-files
/home/hadoop/.ssh/id_rsa
dfs.ha.fencing.ssh.connect-timeout
30000
dfs.ha.automatic-failover.enabled
true
dfs.hosts
/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/etc/hadoop/slaves
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.nodemanager.aux-services.mapreduce.shuffle.class
org.apache.hadoop.mapred.ShuffleHandler
yarn.nodemanager.localizer.address
0.0.0.0:23344
Address where the localizer IPC is.
yarn.nodemanager.webapp.address
0.0.0.0:23999
NM Webapp address.
yarn.resourcemanager.connect.retry-interval.ms
2000
yarn.resourcemanager.ha.enabled
true
yarn.resourcemanager.ha.automatic-failover.enabled
true
yarn.resourcemanager.ha.automatic-failover.embedded
true
yarn.resourcemanager.cluster-id
yarn-cluster
yarn.resourcemanager.ha.rm-ids
rm1,rm2
yarn.resourcemanager.scheduler.class
org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler
yarn.resourcemanager.recovery.enabled
true
yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms
5000
yarn.resourcemanager.store.class
org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
yarn.resourcemanager.zk-address
hadoop001:2181,hadoop002:2181,hadoop003:2181
yarn.resourcemanager.zk.state-store.address
hadoop001:2181,hadoop002:2181,hadoop003:2181
yarn.resourcemanager.address.rm1
hadoop001:23140
yarn.resourcemanager.address.rm2
hadoop002:23140
yarn.resourcemanager.scheduler.address.rm1
hadoop001:23130
yarn.resourcemanager.scheduler.address.rm2
hadoop002:23130
yarn.resourcemanager.admin.address.rm1
hadoop001:23141
yarn.resourcemanager.admin.address.rm2
hadoop002:23141
yarn.resourcemanager.resource-tracker.address.rm1
hadoop001:23125
yarn.resourcemanager.resource-tracker.address.rm2
hadoop002:23125
yarn.resourcemanager.webapp.address.rm1
hadoop001:8088
yarn.resourcemanager.webapp.address.rm2
hadoop002:8088
yarn.resourcemanager.webapp.https.address.rm1
hadoop001:23189
yarn.resourcemanager.webapp.https.address.rm2
hadoop002:23189
yarn.log-aggregation-enable
true
yarn.log.server.url
http://hadoop001:19888/jobhistory/logs
yarn.nodemanager.resource.memory-mb
2048
yarn.scheduler.minimum-allocation-mb
1024
单个任务可申请最少内存,默认1024MB
yarn.scheduler.maximum-allocation-mb
2048
单个任务可申请最大内存,默认8192MB
yarn.nodemanager.resource.cpu-vcores
2
mapreduce.framework.name
yarn
mapreduce.jobhistory.address
hadoop001:10020
mapreduce.jobhistory.webapp.address
hadoop001:19888
mapreduce.map.output.compress
true
mapreduce.map.output.compress.codec
org.apache.hadoop.io.compress.SnappyCodec
hadoop001
hadoop002
hadoop003