yum -y update
升级后建议重启
yum -y install gcc gcc-c++ autoconf automake cmake make rsync vim man zip unzip net-tools zlib zlib-devel openssl openssl-devel pcre-devel tcpdump lrzsz tar wget
hostnamectl set-hostname hadoop
或者
vim /etc/hostname
spark01
reboot
vim /etc/sysconfig/network-scripts/ifcfg-ens160
网卡 配置文件示例
TYPE=Ethernet
PROXY_METHOD=none
BROWSER_ONLY=no
BOOTPROTO=none
DEFROUTE=yes
IPV4_FAILURE_FATAL=no
IPV6INIT=yes
IPV6_AUTOCONF=yes
IPV6_DEFROUTE=yes
IPV6_FAILURE_FATAL=no
NAME=ens160
UUID=943779e9-249c-44bb-b272-d49ea5831ed4
DEVICE=ens160
ONBOOT=yes
IPADDR=192.168.28.11
PREFIX=24
GATEWAY=192.168.28.2
DNS1=192.168.28.2
保存后
nmcli con up ens160
重启网络服务
systemctl stop firewalld
systemctl disable firewalld
vim /etc/selinux/config
# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
# enforcing - SELinux security policy is enforced.
# permissive - SELinux prints warnings instead of enforcing.
# disabled - No SELinux policy is loaded.
# SELINUX=enforcing
# SELINUXTYPE= can take one of three two values:
# targeted - Targeted processes are protected,
# minimum - Modification of targeted policy. Only selected processes are protected.
# mls - Multi Level Security protection.
# SELINUXTYPE=targeted
SELINUX=disabled
执行下面命令
setenforce 0
或者
sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
setenforce 0
mkdir -p /opt/soft
cd /opt/soft
# 上传jdk zookeeper
tar -zxvf jdk-8u361-linux-x64.tar.gz
mv jdk1.8.0_361 jdk8
tar -zxvf hadoop-3.3.5.tar.gz
mv hadoop-3.3.5 hadoop3
vim /etc/profile
export JAVA_HOME=/opt/soft/jdk8
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/opt/soft/hadoop3
export HADOOP_INSTALL=${HADOOP_HOME}
export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export YARN_HOME=${HADOOP_HOME}
export PATH=${PATH}:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
编辑完成后使用source命令使文件~/.bash_profile生效执行以下命令
source /etc/profile
检查环境变量
printenv
vim /etc/hosts
192.168.28.11 spark01
192.168.28.12 spark02
192.168.28.13 spark03
修改后建议重启
修改Hadoop配置文件 在hadoop解压后的目录找到 etc/hadoop目录
cd /opt/soft/hadoop3
修改如下配置文件
- hadoop-env.sh
- core-site.xml
- hdfs-site.xml
- workers
- mapred-site.xml
- yarn-site.xml
export JAVA_HOME=/opt/soft/jdk8
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_ZKFC_USER=root
export HDFS_JOURNALNODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
<configuration>
<property>
<name>fs.defaultFSname>
<value>hdfs://pueggvalue>
property>
<property>
<name>hadoop.tmp.dirname>
<value>/home/hadoop_datavalue>
property>
<property>
<name>hadoop.http.staticuser.username>
<value>rootvalue>
property>
<property>
<name>ha.zookeeper.quorumname>
<value>spark01:2181,spark02:2181,spark03:2181value>
property>
<property>
<name>dfs.permissions.enabledname>
<value>falsevalue>
property>
<property>
<name>hadoop.proxyuser.root.hostsname>
<value>*value>
property>
<property>
<name>hadoop.proxyuser.root.groupsname>
<value>*value>
property>
configuration>
<configuration>
<property>
<name>dfs.nameservicesname>
<value>pueggvalue>
property>
<property>
<name>dfs.ha.namenodes.pueggname>
<value>nn1,nn2value>
property>
<property>
<name>dfs.namenode.rpc-address.puegg.nn1name>
<value>spark01:8020value>
property>
<property>
<name>dfs.namenode.rpc-address.puegg.nn2name>
<value>spark02:8020value>
property>
<property>
<name>dfs.namenode.http-address.puegg.nn1name>
<value>spark01:9870value>
property>
<property>
<name>dfs.namenode.http-address.puegg.nn2name>
<value>spark02:9870value>
property>
<property>
<name>dfs.namenode.shared.edits.dirname>
<value>qjournal://spark01:8485;spark02:8485;spark03:8485/pueggvalue>
property>
<property>
<name>dfs.client.failover.proxy.provider.pueggname>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
property>
<property>
<name>dfs.ha.fencing.methodsname>
<value>sshfencevalue>
property>
<property>
<name>dfs.ha.fencing.ssh.private-key-filesname>
<value>/root/.ssh/id_rsavalue>
property>
<property>
<name>dfs.journalnode.edits.dirname>
<value>/opt/journalnode/datavalue>
property>
<property>
<name>dfs.ha.automatic-failover.enabledname>
<value>truevalue>
property>
<property>
<name>dfs.safemode.threshold.pctname>
<value>1value>
<description>
Specifies the percentage of blocks that should satisfy
the minimal replication requirement defined by dfs.replication.min.
Values less than or equal to 0 mean not to wait for any particular
percentage of blocks before exiting safemode.
Values greater than 1 will make safe mode permanent.
description>
property>:q!
configuration>
spark01
spark02
spark03
<configuration>
<property>
<name>mapreduce.framework.namename>
<value>yarnvalue>
property>
<property>
<name>mapreduce.application.classpathname>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*value>
property>
configuration>
<configuration>
<property>
<name>yarn.resourcemanager.ha.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.cluster-idname>
<value>cluster1value>
property>
<property>
<name>yarn.resourcemanager.ha.rm-idsname>
<property>
<name>yarn.resourcemanager.hostname.rm1name>
<value>spark01value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm2name>
<value>spark02value>
property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1name>
<value>spark01:8088value>
property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2name>
<value>spark02:8088value>
property>
<property>
<name>yarn.resourcemanager.zk-addressname>
<value>spark01:2181,spark02:2181,spark03:2181value>
property>
<property>
<name>yarn.nodemanager.aux-servicesname>
<value>mapreduce_shufflevalue>
property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.classname>
<value>org.apache.hadoop.mapred.ShuffleHandlervalue>
property>
<property>
<name>yarn.nodemanager.env-whitelistname>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOMEvalue>
property>
configuration>
创建本地秘钥并将公共秘钥写入认证文件
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
# 或者
ssh-copy-id spark01
ssh-copy-id spark02
ssh-copy-id spark03
scp -rv ~/.ssh root@spark02:~/
scp -rv ~/.ssh root@spark03:~/
# 远程登录自己
ssh spark01
# Are you sure you want to continue connecting (yes/no)? 此处输入yes
# 登录成功后exit或者logout返回
exit
scp -v /etc/profile root@spark02:/etc
scp -v /etc/profile root@spark03:/etc
scp -rv /opt/soft/hadoop3/etc/hadoop/* root@spark02:/opt/soft/hadoop3/etc/hadoop/
scp -rv /opt/soft/hadoop3/etc/hadoop/* root@spark03:/opt/soft/hadoop3/etc/hadoop/
source /etc/profile
# 创建数据目录
mkdir -p /home/hadoop_data
1. 启动三个zookeeper:zkServer.sh start
2. 启动三个JournalNode:hadoop-daemon.sh start journalnode
7. 在其中一个namenode上格式化:hdfs namenode -format
8. 把刚刚格式化之后的元数据拷贝到另外一个namenode上
a) 启动刚刚格式化的namenode :hadoop-daemon.sh start namenode
b) 在没有格式化的namenode上执行:hdfs namenode -bootstrapStandby
c) 启动第二个namenode: hadoop-daemon.sh start namenode
9. 在其中一个namenode上初始化hdfs zkfc -formatZK
10. 停止上面节点:stop-dfs.sh
11. 全面启动:start-dfs.sh
12. 启动resourcemanager节点 yarn-daemon.sh start resourcemanager
yarn --daemon start
http://dl.bintray.com/sequenceiq/sequenceiq-bin/hadoop-native-64-2.5.0.tar
13、安全模式
hdfs dfsadmin -safemode enter
hdfs dfsadmin -safemode leave
14、查看哪些节点是namenodes并获取其状态
hdfs getconf -namenodes
hdfs haadmin -getServiceState spark01
15、强制切换状态
hdfs haadmin -transitionToActive --forcemanual spark01
# 关机之前 依关闭服务
stop-yarn.sh
stop-dfs.sh
# 开机后 依次开启服务
start-dfs.sh
start-yarn.sh
或者
# 关机之前关闭服务
stop-all.sh
# 开机后开启服务
start-all.sh
#jps 检查进程正常后开启胡哦关闭在再做其它操作