一、下载Hadoop包和JDK
1、下载Hadoop地址:https://archive.apache.org/dist/hadoop/common/hadoop-2.6.4/hadoop-2.6.4.tar.gz
2、下载jdk:链接:https://pan.baidu.com/s/1lbu7eBEtgjeGIi2bWthLnA 提取码:0j0j
二、准备虚拟机
1、在VMware中创建新的虚拟机(Centos7),此步骤省略。
2、实验虚拟机规划:
主机名 | 角色 | Ip地址 | 网络类型 | 内存大小 | 磁盘大小 |
---|---|---|---|---|---|
node1 | master | 192.168.30.11 | NAT模式 | 2G | >=20G |
node2 | slave | 192.168.30.12 | NAT模式 | 1G | >=20G |
node3 | slave | 192.168.30.13 | NAT模式 | 1G | >=20G |
node4 | slave | 192.168.30.14 | NAT模式 | 1G | >=20G |
三、虚拟机基础环境配置
1、修改主机名
[root@template ~]# hostnamectl set-hostname node1
2、配置网卡
- 修改配置文件
[root@template ~]# cd /etc/sysconfig/network-scripts/ [root@template network-scripts]# cp ifcfg-ens33 /opt/ [root@template network-scripts]# vi ifcfg-ens33 #文件修改部分 BOOTPROTO="dhcp" ——> BOOTPROTO="static" #删除如下内容 UUID="821333c6-4f82-4aa5-b74c-f09a5c2da69f" 添加如下内容 IPADDR="192.168.30.11" NETMASK="255.255.255.0" GATEWAY="192.168.30.2" DNS1="192.168.30.2"
- 重启网络
[root@template network-scripts]# systemctl restart network
- 查看网络
[root@node1 ~]# ip a 1: lo:
mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo valid_lft forever preferred_lft forever inet6 ::1/128 scope host valid_lft forever preferred_lft forever 2: ens33: mtu 1500 qdisc pfifo_fast state UP group default qlen 1000 link/ether 00:0c:29:a7:c3:75 brd ff:ff:ff:ff:ff:ff inet 192.168.30.10/24 brd 192.168.30.255 scope global noprefixroute ens33 valid_lft forever preferred_lft forever inet6 fe80::705e:d2d:93d5:dcf8/64 scope link noprefixroute valid_lft forever preferred_lft forever 3、配置host文件
- 修改配置文件
[root@node1 ~]# vi /etc/hosts #添加如下内容 192.168.30.11 node1 192.168.30.12 node2 192.168.30.13 node3 192.168.30.14 node4
- 测试
[root@node1 ~]# ping node1
PING node1 (192.168.30.11) 56(84) bytes of data.
64 bytes from node1 (192.168.30.11): icmp_seq=1 ttl=64 time=0.013 ms
64 bytes from node1 (192.168.30.11): icmp_seq=2 ttl=64 time=0.020 ms
4、关闭防火墙
[root@node1 ~]# systemctl stop firewalld
[root@node1 ~]# systemctl disable firewalld
Removed symlink /etc/systemd/system/multi-user.target.wants/firewalld.service.
Removed symlink /etc/systemd/system/dbus-org.fedoraproject.FirewallD1.service.
5、禁用seLinux
[root@node1 ~]# setenforce 0
[root@node1 ~]# vi /etc/selinux/config
#第7行修改为如下:
SELINUX=disabled
6、时间同步服务
- 时区修改
[root@node1 ~]# timedatectl set-timezone Asia/Shanghai
- 修改配置文件
[root@node1 ~]# vi /etc/chrony.conf
#注释3-6行
#添加如下一行
server ntp.aliyun.com iburst
#修改26行为如下内容
allow 192.168.30.0/24
- 重启服务
[root@node1 ~]# systemctl restart chronyd
- 同步时间
[root@node1 ~]# chronyc sources 210 Number of sources = 1 MS Name/IP address Stratum Poll Reach LastRx Last sample =============================================================================== ^* 203.107.6.88 2 6 17 6 +814us[+4247us] +/- 27ms
7、安装JDK
- 上传JDK至node1主机上
- 解压JDK包
[root@node1 ~]# tar -zxvf jdk-8u181-linux-x64.tar.gz -C /opt/
- 重命名jdk目录
[root@node1 ~]# mv /opt/jdk1.8.0_181/ /opt/jdk
- 配置jdk环境变量
[root@node1 ~]# vi /etc/profile.d/hadoop.sh
#添加如下内容
export JAVA_HOME=/opt/jdk
export PATH=$PATH:$JAVA_HOME/bin
- 编译执行
[root@node1 ~]# source /etc/profile
- 验证
[root@node1 ~]# java -version java version "1.8.0_181" Java(TM) SE Runtime Environment (build 1.8.0_181-b13) Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode)
8、安装Hadoop
- 上传hadoop包之node1主机中
- 解压hadoop包
[root@node1 ~]# tar -zxvf hadoop-2.6.4.tar.gz -C /opt/
[root@node1 ~]# mv /opt/hadoop-2.6.4/ /opt/hadoop
- 添加环境变量
[root@node1 ~]# vi /etc/profile.d/hadoop.sh
export JAVA_HOME=/opt/jdk
export HADOOP_HOME=/opt/hadoop
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
- 编译执行
[root@node1 ~]# source /etc/profile
- 验证
[root@node1 ~]# hadoop
Usage: hadoop [--config confdir] COMMAND
where COMMAND is one of:
fs run a generic filesystem user client
version print the version
jar run a jar file
checknative [-a|-h] check native hadoop and compression libraries availability
distcp copy file or directories recursively
archive -archiveName NAME -p * create a hadoop archive
classpath prints the class path needed to get the
credential interact with credential providers
Hadoop jar and the required libraries
daemonlog get/set the log level for each daemon
s3guard manage data on S3
trace view and modify Hadoop tracing settings
or
CLASSNAME run the class named CLASSNAME
9、hadoop配置文件
- 修改core-site.xml
[root@node1 ~]# vi /opt/hadoop/etc/hadoop/core-site.xml #修改为如下:
fs.defaultFS hdfs://node1:8020 hadoop.tmp.dir /hadoop-full/ - 修改hdfs-site.xml
[root@node1 hadoop]# vi /opt/hadoop/etc/hadoop/hdfs-site.xml #修改内容为如下:
dfs.replication 3 dfs.permissions.enabled false dfs.namenode.secondary.http-address node1:50090 - 修改mapred-site.xml
[root@node1 ~]# cd /opt/hadoop/etc/hadoop/
[root@node1 hadoop]# cp mapred-site.xml.template mapred-site.xml
[root@node1 hadoop]# vi mapred-site.xml
#修改为如下代码
mapreduce.framework.name
yarn
mapreduce.jobhistory.address
node1:10020
mapreduce.jobhistory.webapp.address
node1:19888
- 修改slaves
[root@node1 hadoop]# vi /opt/hadoop/etc/hadoop/slaves #修改为如下内容: node2 node3 node4
- 修改 yarn-site.xml
[root@node1 hadoop]# vi yarn-site.xml #修改为如下内容
yarn.nodemanager.aux-services mapreduce_shuffle yarn.resourcemanager.hostname node1 yarn.log-aggregation-enable true
10、克隆虚拟机
- node1主机关机并克隆虚拟机
- 修改node2主机配置
1)修改网络[root@node1 ~]# vi /etc/sysconfig/network-scripts/ifcfg-ens33 #修改内容为: IPADDR="192.168.30.12"
2)修改主机名
[root@node1 ~]# hostnamectl set-hostname node2
3)时间同步服务[root@node1 ~]# vi /etc/chrony.conf #第7行修改为如下: server node1 iburst #第26行注释 # allow 192.168.30.0/24
4)重启虚拟机
[root@node1 ~]# reboot
- 修改node3主机配置 :配置步骤参考node2即可
- 修改node4主机配置:配置步骤参考node2即可
11、配置ssh
- 登录node1主机
- 生成ssh-key
[root@node1 ~]# ssh-keygen
#一直回车到看到如下效果
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:9F0zNBhe7RGjjLZuZcfjLCDzkgYemNtEZLOIsSeVTH4 root@node1
The key's randomart image is:
+---[RSA 2048]----+
| .oo.+ .o++.|
| *o+ o .+o.oo|
| + + E. o.o+..|
| o =. ......o.|
| o +So.o.o + |
| = o * + + .|
| . o + + . o |
| . o . |
| |
+----[SHA256]-----+
- 进入ssh生成目录
[root@node1 ~]# cd ~/.ssh/
- 发送公钥之node1、node2、node3、node4主机上
[root@node1 .ssh]# ssh-copy-id root@node1
#输入yes
#输入目标主机密码
123456
Now try logging into the machine, with: "ssh 'root@node1'"
and check to make sure that only the key(s) you wanted were added.
[root@node1 .ssh]# ssh-copy-id root@node2
[root@node1 .ssh]# ssh-copy-id root@node3
[root@node1 .ssh]# ssh-copy-id root@node4
12、启动HDFS
- 格式化hdfs文件系统
[root@node1 ~]# hdfs namenode -format
#表示成功格式化
20/01/17 11:06:20 INFO common.Storage: Storage directory /hadoop/dfs/name has been successfully formatted.
- 启动Hadoop
#启动所有服务
[root@node1 ~]# start-all.sh
- 查看进程
#通过检查启动的进程 [root@node1 sbin]# jps 1232 NameNode 1920 Jps 1404 SecondaryNameNode
- webui方式
验证HDFS启动的网址:http://node1:50070/
Yarn地址:http://node1:8088/cluster