Hadoop环境安装

Hadoop环境安装

1.最小化安装

2.固定ip地址

[root@master ~]# vim /etc/sysconfig/network-scripts/ifcfg-eno16777736 
BOOTPROTO="static"
ONBOOT="yes"

IPADDR=192.168.10.4
GATEWAY=192.168.10.2
NETMASK=255.255.255.0
DNS1=8.8.8.8
DNS2=8.8.4.4
service network restart
ping www.baidu.com

3.配置主机名

hostnamectl set-hostname master
bash 
hostname

4.安装yum库

安装必要的软件

[root@hadoop100 ~]# yum install -y epel-release
[root@hadoop100 ~]# yum install -y psmisc nc net-tools rsync vim lrzsz ntp libzstd openssl-static tree iotop git

5.关闭防火墙

[root@hadoop100 ~]# systemctl stop firewalld
[root@hadoop100 ~]# systemctl disable firewalld

6.创建用户,配置用户密码

useradd nby993
passwd nby993

7.配置用户具有root权限

方便后期sudo执行root权限的命令

vim /etc/sudoers

## Allow root to run any commands anywhere
root    ALL=(ALL)     ALL
nby993   ALL=(ALL)     NOPASSWD:ALL

8.配置映射文件

linux
vim /etc/hosts
192.168.12.4 master
192.168.12.5 slave1
192.168.12.6 slave2

Windows
C:\Windows\System32\drivers\etc
192.168.12.4 master
192.168.12.5 slave1
192.168.12.6 slave2

9.进入opt解压hadoop,jdk

移动到 /usr/local/src/目录下

10.配置环境变量

#配置环境
JAVA_HOME=/usr/local/src/jdk1.8.0_212
HADOOP_HOME=/usr/local/src/hadoop-3.1.3

#声明PATH变量
PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin


#提升全局变量
export PATH JAVA_HOME HADOOP_HOME

11.!!!.克隆master 为slave1

配置slave主机
hostnamectl set-hostname slave1,2

网络配置器重新生成MAC地址
在网卡配置添加
配置新的ip
HWADDR=00:50:56:35:18:28 //MAC地址

关闭NetworkManager
systemctl stop NetworkManager
systemctl disable NetworkManager

重启slave
init 6 

12.配置分发脚本

#!/bin/bash

#参数预处理
if [ $# -lt 1 ]
then
 echo "参数不能为空"
 exit
fi

#遍历集群中机器依次分发内存
for host in slave1 slave2
do
 #依次分发内容
 for file in $@
 do
  #判断文件是否存在
  if [ -e $file ]
  then
   #存在
   #1.获取当前文件的目录结构,防止软连接
   pdir=$(cd -P $(dirname $file); pwd)
   #2.获取当前的文件名
   fname=$(basename $file)
   #3.登录目标机器创建统一的目录结构
   ssh $host "mkdir -p $pdir"
   #4.依次把要分发的文件或目录进行分发
   rsync -av $pdir/$fname $host:$pdir
  else
   #不存在
   echo "$file 不存在"
   exit
 fi
 done
done

分发 hadoop,java文件,

[nby993@master ~]$ my_rsync.sh /opt/software/hadoop-3.1.3.tar.gz 


[nby993@master ~]$ my_rsync.sh /usr/local/src/jdk1.8.0_212

13.规划集群

master slave1 slave2
HDFS NameNode DataNode DataNode SecondaryNameNode DataNode
YARN NodeManager ResourceManager NodeManager NodeManager

14.配置hadoop

Daemon App master slave1
NameNode Port Hadoop HDFS NameNode 8020(高可用) / 9000 9820
Hadoop HDFS NameNode HTTP UI 50070 9870
Secondary NameNode Port Secondary NameNode 50091 9869
Secondary NameNode HTTP UI 50090 9868
DataNode Port Hadoop HDFS DataNode IPC 50020 9867
Hadoop HDFS DataNode 50010 9866
Hadoop HDFS DataNode HTTP UI 50075 9864

1.**hadoop-env.sh

export JAVA_HOME=/usr/local/src/jdk1.8.0_212

3.x之前配置JAVA_HOME

core-site.xml

<configuration>
	
    <property>
        <name>fs.defaultFSname>
        <value>hdfs://master:9820value>
property>

    <property>
        <name>hadoop.tmp.dirname>
        <value>/usr/local/src/hadoop/datavalue>
property>


    <property>
        <name>hadoop.http.staticuser.username>
        <value>mastervalue>
property>


    <property>
        <name>hadoop.proxyuser.master.hostsname>
        <value>*value>
property>

    <property>
        <name>hadoop.proxyuser.master.groupsname>
        <value>*value>
property>

    <property>
        <name>hadoop.proxyuser.master.groupsname>
        <value>*value>
property>

configuration>

hdfs-site.xml

<configuration>
	
	<property>
        <name>dfs.namenode.http-addressname>
        <value>master:9870value>
    property>
	
    <property>
        <name>dfs.namenode.secondary.http-addressname>
        <value>slave2:9868value>
    property>
configuration>

yarn-site.xml

<configuration>
	
    <property>
        <name>yarn.nodemanager.aux-servicesname>
        <value>mapreduce_shufflevalue>
property>

    <property>
        <name>yarn.resourcemanager.hostnamename>
        <value>slave1value>
property>

    <property>
        <name>yarn.nodemanager.env-whitelistname>
        <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOMEvalue>
property>

    <property>
        <name>yarn.scheduler.minimum-allocation-mbname>
        <value>512value>
    property>
    <property>
        <name>yarn.scheduler.maximum-allocation-mbname>
        <value>4096value>
property>

    <property>
        <name>yarn.nodemanager.resource.memory-mbname>
        <value>4096value>
property>

    <property>
        <name>yarn.nodemanager.pmem-check-enabledname>
        <value>falsevalue>
    property>
    <property>
        <name>yarn.nodemanager.vmem-check-enabledname>
        <value>falsevalue>
    property>
configuration>

mapred-site.xml

<configuration>
	
    <property>
        <name>mapreduce.framework.namename>
        <value>yarnvalue>
    property>
configuration>

15.启动集群

在NameNode机器(master)上格式化集群

hdfs namenode  -format
启动后在Hadoop目录下生成data目录,logs目录

单节点启动

//master启动namenode
[nby993@master hadoop-3.1.3]$ hdfs --daemon start namenode
[nby993@master hadoop-3.1.3]$ jps
2144 NameNode
2210 Jps

//三台机器启动datanode
[nby993@master hadoop-3.1.3]$ hdfs --daemon start datanode
[nby993@master hadoop-3.1.3]$ jps
2144 NameNode
2260 DataNode
2292 Jps
[nby993@slave1 ~]$ hdfs --daemon start datanode
WARNING: /usr/local/src/hadoop-3.1.3/logs does not exist. Creating.
[nby993@slave1 ~]$ jps
2658 Jps
2626 DataNode
[nby993@slave2 src]$ hdfs --daemon start datanode
WARNING: /usr/local/src/hadoop-3.1.3/logs does not exist. Creating.
[nby993@slave2 src]$ jps
9139 DataNode
9171 Jps


//slave2启动SecondaryNameNode
[nby993@slave2 src]$ hdfs --daemon start secondarynamenode
[nby993@slave2 src]$ jps
9265 SecondaryNameNode
9298 Jps
9139 DataNode

//启动ResourceManager
[nby993@slave1 hadoop-3.1.3]$ yarn --daemon start resourcemanager
[nby993@slave1 hadoop-3.1.3]$ jps
2789 DataNode
2918 ResourceManager
2958 Jps

//三台机器启动NodeManager
[nby993@slave1 hadoop-3.1.3]$ yarn --daemon start nodemanager
[nby993@slave1 hadoop-3.1.3]$ jps
3186 NodeManager
2789 DataNode
2918 ResourceManager
3227 Jps
[nby993@master hadoop-3.1.3]$ yarn --daemon start nodemanager
[nby993@master hadoop-3.1.3]$ jps
2945 Jps
2473 NameNode
2588 DataNode
2879 NodeManager
[nby993@slave2 src]$ yarn --daemon start nodemanager
[nby993@slave2 src]$ jps
9265 SecondaryNameNode
9139 DataNode
9427 Jps
9359 NodeManager

16.免密登陆

master->master,slave1,slave2

slave1->master,slave1,slave2

slave2->master,slave1,slave2

生成密钥
[nby993@master ~]$ ssh-keygen -t rsa -P ""
+--[ RSA 2048]----+
| .+o..o .        |
|  +=o= o         |
| o.== =          |
|..oo+E o         |
|.  o+ . S        |
|                 |
|                 |
|                 |
|                 |
+-----------------+
[nby993@master ~]$ ll .ssh/
-rw-------. 1 nby993 nby993 1679 8月   5 19:59 id_rsa //私钥
-rw-r--r--. 1 nby993 nby993  395 8月   5 19:59 id_rsa.pub //公钥

给master,slave1,slave2授权
 ssh-copy-id master
 ssh master
 ssh-copy-id slave1
 ssh slave1
 ssh-copy-id slave2
 ssh slave2

17.配置集体群启群停

[nby993@master hadoop]$ vim workers 
master
slave1
slave2

分发到slave1,slave2

[nby993@master hadoop]$ start-dfs.sh 
[nby993@slave1 hadoop-3.1.3]$ start-yarn.sh

[nby993@master hadoop]$ jps
3811 NameNode
4297 NodeManager
4426 Jps
3963 DataNode
[nby993@slave1 hadoop-3.1.3]$ jps
4996 Jps
3846 DataNode
4024 ResourceManager
4890 NodeManager
[nby993@slave2 src]$ jps
10993 NodeManager
10821 SecondaryNameNode
10742 DataNode
11129 Jps

#!/bin/bash

if [ $# -lt 1 ]
then
 echo "参数不能为空"
 exit
fi

case $1 in
"start")
 #启动hdfs
 echo "==================正在启动HDFS================"
 ssh master /usr/local/src/hadoop-3.1.3/sbin/start-dfs.sh
 #启动yarn
 echo "==================正在启动YARN================"
 ssh slave1 /usr/local/src/hadoop-3.1.3/sbin/start-yarn.sh
;;

"stop")
 #停止hdfs
 echo "==================正在停止HDFS================"
 ssh master /usr/local/src/hadoop-3.1.3/sbin/stop-dfs.sh
 #停止yarn
 echo "==================正在停止YARN================"
 ssh slave1 /usr/local/src/hadoop-3.1.3/sbin/stop-yarn.sh

;;
*)
echo "参数非法"
exit
;;
esac

18.配置历史服务器

针对MR的历史服务器

 mapred-site.xml增加
 

<property>
    <name>mapreduce.jobhistory.addressname>
    <value>master:10020value>
property>


<property>
    <name>mapreduce.jobhistory.webapp.addressname>
    <value>master:19888value>
property>

启动历史服务器

[nby993@master hadoop]$ mapred --daemon start historyserver
[nby993@master hadoop]$ jps
5667 NameNode
6261 JobHistoryServer //历史服务器
5820 DataNode
6285 Jps
6095 NodeManager

更改集启动停止脚本

#!/bin/bash

if [ $# -lt 1 ]
then
 echo "参数不能为空"
 exit
fi

case $1 in
"start")
 #启动hdfs
 echo "==================正在启动HDFS================"
 ssh master /usr/local/src/hadoop-3.1.3/sbin/start-dfs.sh
 #启动yarn
 echo "==================正在启动YARN================"
 ssh slave1 /usr/local/src/hadoop-3.1.3/sbin/start-yarn.sh
 #启动MR历史服务器
 echo "==================正在开启MR历史================"
 ssh master mapred --daemon start historyserver
;;

"stop")
 #停止hdfs
 echo "==================正在停止HDFS================"
 ssh master /usr/local/src/hadoop-3.1.3/sbin/stop-dfs.sh
 #停止yarn
 echo "==================正在停止YARN================"
 ssh slave1 /usr/local/src/hadoop-3.1.3/sbin/stop-yarn.sh
 #关闭MR历史服务器
  echo "==================正在关闭MR历史================"
 ssh master mapred --daemon stop historyserver
;;
*)
echo "参数非法"

19.配置日志集聚

mr日志形成web界面

yarn-site.xml



    yarn.log-aggregation-enable
    true



    yarn.log.server.url
    http://master:19888/jobhistory/logs



    yarn.log-aggregation.retain-seconds
    604800


20.配置时间同步

0 查看所有节点的ntpd服务状态,开机自启动
[nby993@master hadoop-3.1.3]$ sudo systemctl status ntpd
● ntpd.service - Network Time Service //停止的
   Loaded: loaded (/usr/lib/systemd/system/ntpd.service; disabled; vendor preset: disabled)
   Active: inactive (dead)
[nby993@master hadoop-3.1.3]$ sudo systemctl is-enabled ntpd
disabled //开机关闭

1 将master为时间服务器,更改ntp.conf
vim /etc/ntp.conf
注释,集群在局域网中,不使用互联网时间
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
打开,所有机器可以从这台机器查询和同步时间
restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap

2 节点丢失网络,以然可以采用本地时间做为时间同步服务器
server 127.127.1.0
fudge 127.127.1.0 stratum 10

3 配置硬件时间与系统时间同步
[nby993@master hadoop-3.1.3]$ sudo vim /etc/sysconfig/ntpd
增加
SYNC_HWCLOCK=yes

4 重新启动时间服务,开机自启动
[nby993@master hadoop-3.1.3]$ sudo systemctl start ntpd
[nby993@master hadoop-3.1.3]$ sudo systemctl enable ntpd
Created symlink from /etc/systemd/system/multi-user.target.wants/ntpd.service to /usr/lib/systemd/system/ntpd.service.

5 slave1,slave2开启定时任务,与服务器时间同步,10分钟同步
[nby993@slave1 ~]$ sudo crontab -e
    */10 * * * * /usr/sbin/ntpdate master


你可能感兴趣的:(hadoop,linux,hdfs)