#################centos7###############
#########hadoop-2.5.0-cdh5.3.6伪分布式#########
#JDK
JAVA_HOME=/opt/app/jdk1.8.0_171
JRE_HOME=/opt/app/jdk1.8.0_171/jre
CLASS_PATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib
PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin
export JAVA_HOME JRE_HOME CLASS_PATH PATH
#/ect/hostname(update)
centos11.com
#/ect/hosts(add)
192.168.113.136 centos10.com
192.168.113.133 centos11.com
192.168.113.135 centos12.com
--------------------1--------------------
etc/hadoop/hadoop-env.sh(replace)
export JAVA_HOME=/opt/app/jdk1.8.0_171
#test
mkdir input
cp etc/hadoop/*.xml input
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.2.jar grep input output 'dfs[a-z.]+'
cat output/p*
--------------------2.hdfs--------------------
#mkdir temp
cd /opt/app/hadoop-2.5.0-cdh5.3.6
mkdir -p data/temp
#NameNode
etc/hadoop/core-site.xml:
etc/hadoop/hdfs-site.xml:
#format
bin/hdfs namenode -format
sbin/hadoop-daemon.sh start namenode
sbin/hadoop-daemon.sh start datanode
sbin/hadoop-daemon.sh start secondarynamenode
#window host add
192.168.113.136 centos10.com
192.168.113.133 centos11.com
192.168.113.135 centos12.com
#cat
jps
http://centos11.com:50070
--------------------3.yarn--------------------
etc/hadoop/hadoop-env.sh(replace)
export JAVA_HOME=/opt/app/jdk1.8.0_171
etc/hadoop/yarn-site.xml:
#DataNodes,NodeManager
slaves文件中;
centos11.com
sbin/yarn-daemon.sh start resourcemanager
sbin/yarn-daemon.sh start nodemanager
#cat
jps
http://centos11.com:8088
http://centos11.com:8042
--------------------4.mapredure--------------------
etc/hadoop/mapred-env.sh(replace)
export JAVA_HOME=/opt/app/jdk1.8.0_171
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0-cdh5.3.6.jar wordcount /user/centos/wordcount/input /user/centos/wordcount/output
#历史服务器
sbin/mr-jobhistory-daemon.sh start historyserver
http://centos11.com:19888
Hadoop2.5
1.COMMON
2.HDFS
存储数据
NameNode(存储文件系统元数据,命名空间)
DataNode(存储数据)
SecondaryNameNode(辅助NameNode工作,定时合并文件)
3.YRAN
Hadoop操作系统
Data操作系统
Container
ResourceManager(整个集群资源的管理和调度)
NodeManager(管理每个节点的资源和调度)
4.MapReduce
Map分
Reduce合
input->map->shuffle->reduce->output
分布式并行的计算框架
运行模式:
本地模式
yarn模式
启动hdfs和yarn方式
1.逐一启动
hadoop-daemon.sh,yarn-daemon.sh
2.分开启动
start-dfs.sh,start-yarn.sh
3.一起启动
start-all.sh
hdfs
#NameNode
etc/hadoop/core-site.xml:
#DataNode
slaves:
centos11.com
#SecondaryNameNode
etc/hadoop/hdfs-site.xml:
yarn
#ResourceManager
etc/hadoop/yarn-site.xml:
#NodeManager
slaves:
centos11.com
MapReduce
#historyserver
etc/hadoop/mapred-site.xml:
################################################################################
#/ect/hosts(add)
192.168.113.139 centos10.com
192.168.113.137 centos11.com
192.168.113.135 centos12.com
#slaves
centos10.com
centos11.com
centos12.com
centos11 centos12 centos10
HDFS
NameNode
DataNode DataNode DataNode
SecondaryNameNode
YARN
ResourceManager
NodeManager NodeManager NodeManager
MapReduce
JobHistoryServer
cd ~/.ssh
ssh-keygen -t rsa
ssh-copy-id -i ~/.ssh/id_rsa.pub user@server (-i:指定公钥文件)
#测试免密码登录
ssh [email protected]
#启动
bin/hdfs namenode -format
sbin/start-dfs.sh,sbin/start-yarn.sh
#基本测试
bin/hdfs dfs -mkdir -p /user/centos/temp/xml
bin/hdfs dfs -put etc/hadoop/*.xml /user/centos/temp/xml
bin/hdfs dfs -text /user/centos/temp/xml/core-site.xml