[root@spark1 soft]# vim /etc/profile
#JDK环境变量配置
#export JAVA_HOME=/application/jdk1.7.0_79
export JAVA_HOME=/application/jdk1.8.0_172
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib/rt.jar
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin
环境变量生效
[root@spark1 soft]# source /etc/profile
[root@spark1 soft]# java -version
openjdk version "1.8.0_121"
OpenJDK Runtime Environment (build 1.8.0_121-b13)
OpenJDK 64-Bit Server VM (build 25.121-b13, mixed mode)
[root@spark1 soft]#
$ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
$ cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
验证ssh,# ssh localhost
不需要输入密码即可登录。
下载地址:
https://www.apache.org/dyn/closer.cgi/hadoop/common/
https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-2.6.5/hadoop-2.6.5.tar.gz
[root@spark1 soft]# tar -zxvf hadoop-2.6.5.tar.gz -C /application/
在/root /hadoop/目录下,建立tmp、hdfs/name、hdfs/data目录,执行如下命令
#mkdir /root/hadoop/tmp
#mkdir /root/hadoop/hdfs
#mkdir /root/hadoop/hdfs/data
#mkdir /root/hadoop/hdfs/name
#Hadoop环境变量配置
export HADOOP_HOME=/application/hadoop-2.6.5
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
[root@spark1 soft]# source /etc/profile
进入$HADOOP_HOME/etc/hadoop目录,配置 hadoop-env.sh等。涉及的配置文件如下:
1)hadoop-2.6.5/etc/hadoop/hadoop-env.sh
2)hadoop-2.6.5/etc/hadoop/yarn-env.sh
3)hadoop-2.6.5/etc/hadoop/core-site.xml
4)hadoop-2.6.5/etc/hadoop/hdfs-site.xml
5)hadoop-2.6.5/etc/hadoop/mapred-site.xml
6)hadoop-2.6.5/etc/hadoop/yarn-site.xml
# The java implementation to use.
#export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=/application/jdk1.8.0_172
# some Java parameters
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
export JAVA_HOME=/application/jdk1.8.0_172
添加如下配置:
[root@spark1 hadoop]# cat core-site.xml
fs.default.name
hdfs://spark1:9000
HDFS的URI,文件系统://namenode标识:端口号
hadoop.tmp.dir
/root/hadoop/tmp
namenode上本地的hadoop临时文件夹
[root@spark1 hadoop]#
[root@spark1 hadoop]# cat hdfs-site.xml
dfs.name.dir
/root/hadoop/hdfs/name
namenode上存储hdfs名字空间元数据
dfs.data.dir
/root/hadoop/hdfs/data
datanode上数据块的物理存储位置
dfs.replication
1
副本个数,配置默认是3,应小于datanode机器数量
[root@spark1 hadoop]#
[root@spark1 hadoop]# cat mapred-site.xml
mapreduce.framework.name
yarn
[root@spark1 hadoop]#
[root@spark1 hadoop]# cat yarn-site.xml
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.resourcemanager.webapp.address
${yarn.resourcemanager.hostname}:8099
[root@spark1 hadoop]#
说明:
1)默认端口是8088;
2)这里我设置了yarn.resourcemanager.webapp.address为:${yarn.resourcemanager.hostname}:8099;
hadoop namenode -format
start-dfs.sh
start-yarn.sh
1)执行jps命令,有如下进程,说明Hadoop正常启动
[root@spark1 soft]# jps
5649 DataNode
6631 ResourceManager
5815 SecondaryNameNode
5527 NameNode
6728 NodeManager
7981 Jps
[root@spark1 soft]#
2)访问hdfs
http://192.168.2.191:50070
3)在浏览器中输入:http://192.168.2.191:8099/cluster 即可看到YARN的ResourceManager的界面。
注意:默认端口是8088,这里我设置了yarn.resourcemanager.webapp.address为:${yarn.resourcemanager.hostname}:8099
[root@spark1 hadoop]# hadoop jar /application/hadoop-2.6.5/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.5.jar pi 2 100
Number of Maps = 2
Samples per Map = 100
19/04/13 13:46:49 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Wrote input for Map #0
Wrote input for Map #1
Starting Job
19/04/13 13:46:51 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
19/04/13 13:46:52 INFO input.FileInputFormat: Total input paths to process : 2
19/04/13 13:46:52 INFO mapreduce.JobSubmitter: number of splits:2
19/04/13 13:46:52 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1555134174372_0001
19/04/13 13:46:53 INFO impl.YarnClientImpl: Submitted application application_1555134174372_0001
19/04/13 13:46:53 INFO mapreduce.Job: The url to track the job: http://spark1:8099/proxy/application_1555134174372_0001/
19/04/13 13:46:53 INFO mapreduce.Job: Running job: job_1555134174372_0001
19/04/13 13:47:00 INFO mapreduce.Job: Job job_1555134174372_0001 running in uber mode : false
19/04/13 13:47:00 INFO mapreduce.Job: map 0% reduce 0%
19/04/13 13:47:14 INFO mapreduce.Job: map 100% reduce 0%
19/04/13 13:47:19 INFO mapreduce.Job: map 100% reduce 100%
19/04/13 13:47:19 INFO mapreduce.Job: Job job_1555134174372_0001 completed successfully
19/04/13 13:47:19 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=50
FILE: Number of bytes written=322803
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=522
HDFS: Number of bytes written=215
HDFS: Number of read operations=11
HDFS: Number of large read operations=0
HDFS: Number of write operations=3
Job Counters
Launched map tasks=2
Launched reduce tasks=1
Data-local map tasks=2
Total time spent by all maps in occupied slots (ms)=23209
Total time spent by all reduces in occupied slots (ms)=2996
Total time spent by all map tasks (ms)=23209
Total time spent by all reduce tasks (ms)=2996
Total vcore-milliseconds taken by all map tasks=23209
Total vcore-milliseconds taken by all reduce tasks=2996
Total megabyte-milliseconds taken by all map tasks=23766016
Total megabyte-milliseconds taken by all reduce tasks=3067904
Map-Reduce Framework
Map input records=2
Map output records=4
Map output bytes=36
Map output materialized bytes=56
Input split bytes=286
Combine input records=0
Combine output records=0
Reduce input groups=2
Reduce shuffle bytes=56
Reduce input records=4
Reduce output records=0
Spilled Records=8
Shuffled Maps =2
Failed Shuffles=0
Merged Map outputs=2
GC time elapsed (ms)=2514
CPU time spent (ms)=12980
Physical memory (bytes) snapshot=697511936
Virtual memory (bytes) snapshot=6333603840
Total committed heap usage (bytes)=499646464
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=236
File Output Format Counters
Bytes Written=97
Job Finished in 28.254 seconds
Estimated value of Pi is 3.12000000000000000000
[root@spark1 hadoop]#
==============================
QQ群:143522604
群里有相关资源
欢迎和大家一起学习、交流、提升!
==============================