完全分布式
• Hadoop最大的优势就是分布式集群计算,所以在生
产环境下都是搭建的最后一种模式:完全分布模式
主机
|
角色 |
软件 |
192.168.6.10 |
master NameNode SecondaryNameNode ResourceManager |
HDFS YARN |
192.168.6.11 |
node1 DataNode NodeManager |
HDFS YARN |
192.168.6.12 |
node2 DataNode NodeManager |
HDFS YARN |
192.168.6.13 |
node3 DataNode NodeManager |
HDFS YARN |
1、安装操作系统
– 注意:只开启必要的服务,关闭其他无关的系统服务,
系统最小化,服务最小化
– 注意:关闭系统防火墙
– 注意:关闭 selinux
– iptables-save
– sestatus
搭建完全分布式
• 2、在所有系统上安装 JAVA 环境和调试工具 jps
– 注意:保证所有机器系统版本及 java 版本的一致性
– 注意:保证所有安装路径的一致性
– java –version
– jps
搭建完全分布式
• 3、配置 主机名 和 ip 对应关系 /etc/hosts
– 注意 :所有主机的配置都需要修改
– 注意 : master 要能 ping 通所有主机
– 注意 : node 要能 ping 通 master
[root@nn01 hadoop]# vim mapred-site.xml
[root@nn01 hadoop]# vim yarn-site.xml
[root@nn01 bin]# ./rrr node{1..3}
[root@nn01 hadoop]# ./sbin/start-yarn.sh
[root@nn01 hadoop]# ./sbin/start-dfs.sh
[root@nn01 hadoop]# jps
1456 SecondaryNameNode
1266 NameNode
1575 Jps
863 ResourceManager
[root@node1 ~]# jps
855 NodeManager
1065 Jps
989 DataNode
[root@node2 ~]# jps
1523 Jps
1447 DataNode
1323 NodeManager
[root@node3 ~]# jps
866 NodeManager
1065 Jps
989 DataNode
[root@nn01 hadoop]# ./bin/yarn node -list
18/08/02 10:41:11 INFO client.RMProxy: Connecting to ResourceManager at nn01/192.168.6.10:8032
Total Nodes:3
Node-Id Node-State Node-Http-Address Number-of-Running-Containers
node2:33740 RUNNING node2:8042 0
node3:35863 RUNNING node3:8042 0
node1:36747 RUNNING node1:8042 0
[root@nn01 hadoop]# ss -tunlp
Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port
udp UNCONN 0 0 127.0.0.1:323 *:* users:(("chronyd",pid=472,fd=1))
udp UNCONN 0 0 ::1:323 :::* users:(("chronyd",pid=472,fd=2))
tcp LISTEN 0 128 192.168.6.10:50070 *:* users:(("java",pid=1266,fd=193))
tcp LISTEN 0 128 *:22 *:* users:(("sshd",pid=675,fd=3))
tcp LISTEN 0 100 127.0.0.1:25 *:* users:(("master",pid=773,fd=13))
tcp LISTEN 0 128 192.168.6.10:9000 *:* users:(("java",pid=1266,fd=204))
tcp LISTEN 0 128 192.168.6.10:50090 *:* users:(("java",pid=1456,fd=198))
tcp LISTEN 0 128 :::22 :::* users:(("sshd",pid=675,fd=4))
tcp LISTEN 0 128 ::ffff:192.168.6.10:8088 :::* users:(("java",pid=863,fd=227))
tcp LISTEN 0 100 ::1:25 :::* users:(("master",pid=773,fd=14))
tcp LISTEN 0 128 ::ffff:192.168.6.10:8030 :::* users:(("java",pid=863,fd=207))
tcp LISTEN 0 128 ::ffff:192.168.6.10:8031 :::* users:(("java",pid=863,fd=196))
tcp LISTEN 0 128 ::ffff:192.168.6.10:8032 :::* users:(("java",pid=863,fd=217))
tcp LISTEN 0 128 ::ffff:192.168.6.10:8033 :::* users:(("java",pid=863,fd=232))
[root@nn01 hadoop]# ./bin/hadoop
Usage: hadoop [--config confdir] [COMMAND | CLASSNAME]
CLASSNAME run the class named CLASSNAME
or
where COMMAND is one of:
fs run a generic filesystem user client
version print the version
jar
note: please use "yarn jar" to launch
YARN applications, not this command.
checknative [-a|-h] check native hadoop and compression libraries availability
distcp
archive -archiveName NAME -p
classpath prints the class path needed to get the
credential interact with credential providers
Hadoop jar and the required libraries
daemonlog get/set the log level for each daemon
trace view and modify Hadoop tracing settings
Most commands print help when invoked w/o parameters.
HDFS 基本使用HDFS 使用
• HDFS 基本命令
– ./bin/hadoop fs –ls /
– 对应 shell 命令 ls /
– ./bin/hadoop fs –mkdir /abc
– 对应 shell 命令 mkdir /abc
– ./bin/hadoop fs –rmdir /abc
– 对应 shell 命令 rmdir /abc
[root@nn01 hadoop]# ./bin/hadoop fs //可选择参数
Usage: hadoop fs [generic options]
[-appendToFile
[-cat [-ignoreCrc]
[-checksum
[-chgrp [-R] GROUP PATH...]
[-chmod [-R]
[-chown [-R] [OWNER][:[GROUP]] PATH...]
[-copyFromLocal [-f] [-p] [-l]
[-copyToLocal [-p] [-ignoreCrc] [-crc]
[-count [-q] [-h]
[-cp [-f] [-p | -p[topax]]
[-createSnapshot
[-deleteSnapshot
[-df [-h] [
[-du [-s] [-h]
[-expunge]
[-find
[-get [-p] [-ignoreCrc] [-crc]
[-getfacl [-R]
[-getfattr [-R] {-n name | -d} [-e en]
[-getmerge [-nl]
[-help [cmd ...]]
[-ls [-d] [-h] [-R] [
[-mkdir [-p]
[-moveFromLocal
[-moveToLocal
[-mv
[-put [-f] [-p] [-l]
[-renameSnapshot
[-rm [-f] [-r|-R] [-skipTrash]
[-rmdir [--ignore-fail-on-non-empty]
[-setfacl [-R] [{-b|-k} {-m|-x
[-setfattr {-n name [-v value] | -x name}
[-setrep [-R] [-w]
[-stat [format]
[-tail [-f]
[-test -[defsz]
[-text [-ignoreCrc]
[-touchz
[-truncate [-w]
[-usage [cmd ...]]
Generic options supported are
-conf
-D
-fs
-jt
-files
-libjars
-archives
The general command line syntax is
bin/hadoop command [genericOptions] [commandOptions]
[root@nn01 hadoop]# ./bin/hadoop fs -ls /
[root@nn01 hadoop]# ./bin/hadoop fs -mkdir /aaa
[root@nn01 hadoop]# ./bin/hadoop fs -ls /aaa
[root@nn01 hadoop]# ./bin/hadoop fs -touchz /f1
[root@nn01 hadoop]# ls
bin etc include lib libexec LICENSE.txt logs NOTICE.txt oo README.txt sbin share xx xx1
[root@nn01 hadoop]# ./bin/hadoop fs -put *.txt /aaa/
[root@nn01 hadoop]# ./bin/hadoop fs -ls /
Found 2 items
drwxr-xr-x - root supergroup 0 2018-08-02 11:10 /aaa
-rw-r--r-- 2 root supergroup 0 2018-08-02 11:10 /f1
[root@nn01 hadoop]# ./bin/hadoop fs -get /f1 ./f2
[root@nn01 hadoop]# ./bin/hadoop fs -rm /f1
18/08/02 11:12:42 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 0 minutes, Emptier interval = 0 minutes.
Deleted /f1
[root@nn01 hadoop]# ./bin/hadoop fs -ls /
Found 1 items
drwxr-xr-x - root supergroup 0 2018-08-02 11:10 /aaa
[root@nn01 hadoop]# ./bin/hadoop fs -ls -abc /
-ls: Illegal option -abc
Usage: hadoop fs [generic options] -ls [-d] [-h] [-R] [
//热词分析统计
[root@nn01 hadoop]# ./bin/hadoop fs -mkdir /oo
[root@nn01 hadoop]# ./bin/hadoop fs -put *.txt /oo/
[root@nn01 hadoop]# ./bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.6.jar wordcount /oo /xx
[root@nn01 hadoop]# ./bin/hadoop fs -ls /xx
[root@nn01 hadoop]# ./bin/hadoop fs -cat /xx/part-r-00000
增加修复节点
按照单机方法安装一台机器,部署运行的 java 环境
拷贝 namenode 的文件到本机
启动 datanode
./sbin/hadoop-daemons.sh start datanode
设置同步带宽
./bin/hdfs dfsadmin -setBalancerBandwidth 60000000
./sbin/start-balancer.sh
[root@node4 ~]# hostnamectl set-hostname newnode
[root@node3 ~]# scp -r /etc/yum.repos.d/local.repo 192.168.6.14:/etc/yum.repos.d/
[root@nn01 hadoop]# pwd
/usr/local/hadoop
[root@nn01 ~]# cd /root/.ssh/
[root@nn01 .ssh]# ls
authorized_keys id_rsa key.pub known_hosts
[root@nn01 .ssh]#
[root@nn01 .ssh]# mv key.pub id_rsa.pub
[root@nn01 .ssh]# ssh-copy-id -i id_rsa.pub 192.168.6.14
/usr/bin/ssh-copy-id: INFO: Source of key(s) to be installed: "id_rsa.pub"
/usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed
/usr/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it is to install the new keys
[email protected]'s password:
Number of key(s) added: 1
Now try logging into the machine, with: "ssh '192.168.6.14'"
and check to make sure that only the key(s) you wanted were added.
[root@nn01 .ssh]# vim /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.6.10 nn01 namenode,secondarynamenode
192.168.6.14 newnode
192.168.6.11 node1 datanode
192.168.6.12 node2 datanode
192.168.6.13 node3 datanode
newnode
[root@newnode ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
[root@newnode ~]# yum provides rsync
已加载插件:fastestmirror
Determining fastest mirrors
rsync-3.0.9-18.el7.x86_64 : A program for synchronizing files over a network
源 :local_source
[root@newnode ~]# yum -y install rsync-3.0.9-18.el7.x86_64
同步 /etc/hosts
[root@nn01 .ssh]# for i in node{1..3} newnode;do rsync -av /etc/hosts ${i}:/etc/; done
[root@newnode ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.6.10 nn01 namenode,secondarynamenode
192.168.6.14 newnode
192.168.6.11 node1 datanode
192.168.6.12 node2 datanode
192.168.6.13 node3 datanode
[root@newnode ~]# yum -y install java-1.8.0-openjdk-devel
[root@nn01 .ssh]# cd /usr/local/hadoop/
[root@nn01 hadoop]#
[root@nn01 hadoop]# vim etc/hadoop/slaves
node1
node2
node3
newnode
[root@nn01 bin]# ./rrr node{1..3}
[root@newnode ~]# rsync -a nn01:/usr/local/hadoop /usr/local/
[root@newnode ~]# ls /usr/local/
bin etc games hadoop include lib lib64 libexec sbin share src
[root@newnode ~]# cd /usr/local/
[root@newnode local]# cd hadoop/
[root@newnode hadoop]# ls
bin etc f2 include lib libexec LICENSE.txt logs NOTICE.txt oo README.txt sbin share xx xx1
[root@newnode hadoop]# rm -rf logs
[root@newnode hadoop]# mkdir /var/hadoop
[root@newnode hadoop]# jps
1097 Jps
[root@newnode hadoop]# ./sbin/hadoop-daemon.sh
Usage: hadoop-daemon.sh [--config
[root@newnode hadoop]# ./sbin/hadoop-daemon.sh start datanode
[root@newnode hadoop]# jps
1124 DataNode
1196 Jps
[root@nn01 bin]# cd ..
[root@nn01 hadoop]# pwd
/usr/local/hadoop
[root@nn01 hadoop]# ./bin/hdfs
Usage: hdfs [--config confdir] [--loglevel loglevel] COMMAND
where COMMAND is one of:
dfs run a filesystem command on the file systems supported in Hadoop.
classpath prints the classpath
namenode -format format the DFS filesystem
secondarynamenode run the DFS secondary namenode
namenode run the DFS namenode
journalnode run the DFS journalnode
zkfc run the ZK Failover Controller daemon
datanode run a DFS datanode
dfsadmin run a DFS admin client
haadmin run a DFS HA admin client
fsck run a DFS filesystem checking utility
balancer run a cluster balancing utility
jmxget get JMX exported values from NameNode or DataNode.
mover run a utility to move block replicas across
storage types
oiv apply the offline fsimage viewer to an fsimage
oiv_legacy apply the offline fsimage viewer to an legacy fsimage
oev apply the offline edits viewer to an edits file
fetchdt fetch a delegation token from the NameNode
getconf get config values from configuration
groups get the groups which users belong to
snapshotDiff diff two snapshots of a directory or diff the
current directory contents with a snapshot
lsSnapshottableDir list all snapshottable dirs owned by the current user
Use -help to see options
portmap run a portmap service
nfs3 run an NFS version 3 gateway
cacheadmin configure the HDFS cache
crypto configure HDFS encryption zones
storagepolicies list/get/set block storage policies
version print the version
Most commands print help when invoked w/o parameters.
[root@nn01 hadoop]# ./bin/hdfs dfsadmin
Usage: hdfs dfsadmin
Note: Administrative commands can only be run as the HDFS superuser.
[-report [-live] [-dead] [-decommissioning]]
[-safemode
[-saveNamespace]
[-rollEdits]
[-restoreFailedStorage true|false|check]
[-refreshNodes]
[-setQuota
[-clrQuota
[-setSpaceQuota
[-clrSpaceQuota [-storageType
[-finalizeUpgrade]
[-rollingUpgrade [
[-refreshServiceAcl]
[-refreshUserToGroupsMappings]
[-refreshSuperUserGroupsConfiguration]
[-refreshCallQueue]
[-refresh
[-reconfig
[-printTopology]
[-refreshNamenodes datanode_host:ipc_port]
[-deleteBlockPool datanode_host:ipc_port blockpoolId [force]]
[-setBalancerBandwidth
[-fetchImage
[-allowSnapshot
[-disallowSnapshot
[-shutdownDatanode
[-getDatanodeInfo
[-metasave filename]
[-triggerBlockReport [-incremental]
[-help [cmd]]
Generic options supported are
-conf
-D
-fs
-jt
-files
-libjars
-archives
The general command line syntax is
bin/hadoop command [genericOptions] [commandOptions]
[root@nn01 hadoop]# ./bin/hdfs dfsadmin -setBalancerBandwidth 60000000
Balancer bandwidth is set to 60000000
[root@nn01 hadoop]# ./bin/hdfs dfsadmin -setBalancerBandwidth 60000000
Balancer bandwidth is set to 60000000
[root@nn01 hadoop]# ./sbin/start-balancer.sh
starting balancer, logging to /usr/local/hadoop/logs/hadoop-root-balancer-nn01.out
Time Stamp Iteration# Bytes Already Moved Bytes Left To Move Bytes Being Moved
[root@nn01 hadoop]# ./bin/hdfs dfsadmin -report
Configured Capacity: 68673339392 (63.96 GB)
Present Capacity: 61698879488 (57.46 GB)
DFS Remaining: 61697900544 (57.46 GB)
DFS Used: 978944 (956 KB)
DFS Used%: 0.00%
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0
Missing blocks (with replication factor 1): 0
-------------------------------------------------
Live datanodes (4):
Name: 192.168.6.11:50010 (node1)
Hostname: node1
Decommission Status : Normal
Configured Capacity: 17168334848 (15.99 GB)
DFS Used: 331776 (324 KB)
Non DFS Used: 1743495168 (1.62 GB)
DFS Remaining: 15424507904 (14.37 GB)
DFS Used%: 0.00%
DFS Remaining%: 89.84%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Thu Aug 02 12:42:31 CST 2018
Name: 192.168.6.13:50010 (node3)
Hostname: node3
Decommission Status : Normal
Configured Capacity: 17168334848 (15.99 GB)
DFS Used: 303104 (296 KB)
Non DFS Used: 1743405056 (1.62 GB)
DFS Remaining: 15424626688 (14.37 GB)
DFS Used%: 0.00%
DFS Remaining%: 89.84%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Thu Aug 02 12:42:32 CST 2018
Name: 192.168.6.14:50010 (newnode)
Hostname: newnode
Decommission Status : Normal
Configured Capacity: 17168334848 (15.99 GB)
DFS Used: 4096 (4 KB)
Non DFS Used: 1744244736 (1.62 GB)
DFS Remaining: 15424086016 (14.36 GB)
DFS Used%: 0.00%
DFS Remaining%: 89.84%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Thu Aug 02 12:42:34 CST 2018
Name: 192.168.6.12:50010 (node2)
Hostname: node2
Decommission Status : Normal
Configured Capacity: 17168334848 (15.99 GB)
DFS Used: 339968 (332 KB)
Non DFS Used: 1743314944 (1.62 GB)
DFS Remaining: 15424679936 (14.37 GB)
DFS Used%: 0.00%
DFS Remaining%: 89.84%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Thu Aug 02 12:42:32 CST 2018