systemctl stop firewalld.service #停止防火墙
systemctl disable firewalld.service #永久关闭防护墙
iptables -L #查看防火墙状态
将/etc/sysconfig/selinux中的SELINUX=enforcing
改为SELINUX=disabled,修改之后需要重新重启
reboot #重启虚拟机之后生效,可以等主机名修改之后再重启
sestatus #查看selinux状态,重启后显示disabled
修改/etc/sysconfig/network
NETWORKING=yes
HOSTNAME=master #修改主机名为master
修改/etc/hosts文件,追加
192.168.44.201 master
192.168.44.202 slave1
192.168.44.203 slave2
注意:修改完成后重启,将显示已修改的主机名,节点的主机名一定不要相同。
useradd hadoop
passwd hadoop #修改hadoop密码,否则没法作ssh免密互信
编辑/etc/ssh/sshd_config打开sshd服务
RSAAuthentication yes #如果没有可以手动添加
PubkeyAuthentication yes #去掉注释
AuthorizedKeysFile .ssh/authorized_keys
重启sshd服务
service sshd restart
给hadoop用户设置免密登录
su - hadoop
ssh-keygen -t rsa #生成公钥,秘钥,在/home/hadoop/.ssh/下可以看到
ssh-copy-id master #给master节点分发公钥
ssh-copy-id slave1 #给master节点分发公钥
ssh-copy-id slave2 #给master节点分发公钥
ssh slave1
测试是否成功,如果不需要密码就能到另外两个节点,则说明没有问题,三个节点相互测试
[hadoop@slave1 ~]$ ssh master
Last login: Tue Sep 3 18:28:04 2019 from 192.168.44.203
[hadoop@master ~]$ exit
logout
Connection to master closed.
sp:
authorized_keys中是所有节点的公钥,每个节点的公钥和私钥都是不一样的。
链接:https://pan.baidu.com/s/1V1GSmxF1g-GZ1U0yD1OTjQ
提取码:c3r3
mkdir -p /opt/install
tar -zxvf /opt/software/jdk-8u91-linux-x64.tar.gz -C /opt/install/
编辑/etc/profile,追加
#set jdk
export JAVA_HOME=/opt/install/jdk1.8.0_91
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tool.jar
export PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin
source /etc/profile
java -version
scp -r /opt/install/jdk1.8.0_91/ slave1:/opt/install/jdk1.8.0_91/
scp -r /opt/install/jdk1.8.0_91/ slave2:/opt/install/jdk1.8.0_91/
分发玩之后在slave1和slave2执行3-5步骤检查是否安装成功
cd /opt/software/
wget http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz
tar -zxvf hadoop-2.8.5.tar.gz -C /opt/bigdata/
chown -R hadoop:hadoop /opt/bigdata/hadoop-2.8.5/
# set hadoop
export HADOOP_HOME=/opt/bigdata/hadoop-2.8.5
export CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath):$CLASSPATH
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
并source环境变量使之生效
source /etc/profile
mkdir -p /srv/bigdata/hdfs/name
mkdir -p /srv/bigdata/hdfs/data #hdfs数据目录
mkdir -p /tmp/bigdata/hadoop/tmp #临时目录
mkdir -p /usr/bigdata/hdfs/name/
mkdir -p /usr/bigdata/hdfs/data
chown -R hadoop:hadoop /srv/bigdata/hdfs/
chown -R hadoop:hadoop /tmp/bigdata/
chown -R hadoop:hadoop /usr/bigdata/hdfs/name/
chown -R hadoop:hadoop /usr/bigdata/hdfs/data/
hadoop-env.sh
yarn-env.sh
core-site.xml
hdfs-site.xml
mapred-site.xml
yarn-site.xml
slaves
export JAVA_HOME=/opt/install/jdk1.8.0_91
export HADOOP_PREFIX=/opt/bigdata/hadoop-2.8.5
export JAVA_HOME=/opt/install/jdk1.8.0_91
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.home.dir</name>
<value>/opt/bigdata/hadoop-2.8.5</value>
</property>
<property>
<name>hadoop.tmp.dir</name><!-- hadoop临时目录设置 -->
<value>/tmp/bigdata/hadoop/tmp</value>
</property>
</configuration>
<configuration>
<property>
<name>dfs.replication</name><!-- hdfs副本数设置 -->
<value>3</value>
</property>
<property>
<name>dfs.namenode.http-address</name><!-- 访问地址 -->
<value>master:50070</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:50090</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/bigdata/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/bigdata/hdfs/data</value>
</property>
<property>
<name>dfs.hosts</name>
<value>/opt/bigdata/hadoop-2.8.5/etc/hadoop/slaves</value>
</property>
</configuration>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
</configuration>
#没有mapred-site.xml文件就复制一个进行配置
cp mapred-site.xml.template mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
<property>
<name>mapreduce.job.http.address</name>
<value>master:50030</value>
</property>
<property>
<name>mapreduce.task.http.address</name>
<value>master:50060</value>
</property>
</configuration>
master #如果不把master当做数据节点可不配置
slave1
slave2
su hadoop
#注意:先要在从节点需要有/opt/bigdata/目录才可以,且用户是hadoop
scp -r /opt/bigdata/hadoop-2.8.5 hadoop@slave1:/opt/bigdata/
scp -r /opt/bigdata/hadoop-2.8.5 hadoop@slave2:/opt/bigdata/
su - hadoop #必须是hadoop用户
hdfs namenode -format
ps:如果提示hdfs命令识别不了,就是配置hadoop的环境变量没对。
#方式一:
sh start-all.sh
#方式二:
sh start-dfs.sh
sh start-yarn.sh
start-dfs.sh会启动三个进程
NameNode
DataNode #如果在slaves没有配置master,则在master节点上没有此进程
SecondaryNameNode
sh start-yarn.sh会启动两个进进程
ResourceManager
NodeManager
192.168.44.201 master
192.168.44.202 slave1
192.168.44.203 slave2
访问yarn的原生界面:http://master:8088/
访问hdfs的原生界面:http://master:50070/