sudo addgroup hadoop
useradd -g hadoop hadoop
su hadoop
ssh-keygen -t rsa -P ""
cat /proc/sys/net/ipv6/conf/all/disable_ipv6
sudo vim /etc/sysctl.conf
net.ipv6.conf.all.disable_ipv6 = 1 net.ipv6.conf.default.disable_ipv6 = 1 net.ipv6.conf.lo.disable_ipv6 = 1
export JAVA_HOME=/software/devsoftware/jdk1.7.0_55 export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_HOME=/home/hadoop/hadoop-2.5.2 export PATH=$HADOOP_HOME/bin:$PATH
source /etc/profile
export JAVA_HOME=/software/devsoftware/jdk1.7.0_55
这写配置文件中有些需要手工创建目录,有些需要根据系统的实际情况,设置hostname,hostname不能是IP或者localhost,需要在/etc/hosts中进行设置。需要补充一点,有几个文档指出,127.0.0.1最好只跟一个hostname(即Hadoop用到的)绑定,把其余的注释掉。这个究竟是否产生影响,没有测,只是按照网上的说法,只保留一个hostname
<configuration> <property> <name>hadoop.tmp.dir</name> <!--目录必须手动创建出来--> <value>/home/hadoop/data/tmp</value> <description>A base for other temporary directories.</description> </property> <!--file system properties--> <property> <name>fs.defaultFS</name> <!--HDFS的服务地址,只能使用域名,不能设置为IP或者localhost--> <value>hdfs://hostname:9000</value> </property> <property> <!--使用Hadoop自带的so库--> <name>hadoop.native.lib</name> <value>true</value> <description>Should native hadoop libraries, if present, be used.</description> </property> </configuration>
cp mapred-site.xml.template mapred-site.xml
<configuration> <property> <name>mapreduce.framework.name</name> <!--yarn全是小写,不是Yarn--> <value>yarn</value> </property> </configuration>
<configuration> <!-- Site specific YARN configuration properties --> <property> <!--yarn是小写,或许大些Y也可以--> <name>yarn.nodemanager.aux-services</name> <!--不是mapreduce.shuffle--> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <description>The address of the applications manager interface in the RM.</description> <name>Yarn.resourcemanager.address</name> <!--根据实际情况,设置hostname域名--> <value>hostname:18040</value> </property> <property> <description>The address of the scheduler interface.</description> <name>Yarn.resourcemanager.scheduler.address</name> <!--根据实际情况,设置hostname域名--> <value>hostname:18030</value> </property> <property> <description>The address of the RM web application.</description> <name>Yarn.resourcemanager.webapp.address</name> <!--根据实际情况,设置hostname域名--> <value>hostname:18088</value> </property> <property> <description>The address of the resource tracker interface.</description> <name>Yarn.resourcemanager.resource-tracker.address</name> <!--根据实际情况,设置hostname域名--> <value>hostname:8025</value> </property> </configuration>
<configuration> <property> <name>dfs.namenode.name.dir</name> <!--手工创建好--> <value>/home/hadoop/data/hdfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <!--手工创建好--> <value>/home/hadoop/data/hdfs/data</value> </property> <property> <!--HDFS文件复本数--> <name>dfs.replication</name> <value>1</value> </property> </configuration>
hadoop namenode -format
观察日志,如果有输出中包括Storage directory /home/hadoop/data/hdfs/name has been successfully formatted,则表示格式化成功
/home/hadoop/hadoop-2.5.2/sbin/start-all.sh
10682 DataNode 10463 NameNode 11229 ResourceManager 24647 Jps 11040 SecondaryNameNode 11455 NodeManager
tcp 0 0 0.0.0.0:8042 0.0.0.0:* LISTEN 11455/java tcp 0 0 0.0.0.0:50090 0.0.0.0:* LISTEN 11040/java tcp 0 0 0.0.0.0:50070 0.0.0.0:* LISTEN 10463/java tcp 0 0 0.0.0.0:8088 0.0.0.0:* LISTEN 11229/java tcp 0 0 0.0.0.0:34456 0.0.0.0:* LISTEN 11455/java tcp 0 0 0.0.0.0:13562 0.0.0.0:* LISTEN 11455/java tcp 0 0 0.0.0.0:50010 0.0.0.0:* LISTEN 10682/java tcp 0 0 0.0.0.0:50075 0.0.0.0:* LISTEN 10682/java tcp 0 0 0.0.0.0:8030 0.0.0.0:* LISTEN 11229/java tcp 0 0 0.0.0.0:8031 0.0.0.0:* LISTEN 11229/java tcp 0 0 0.0.0.0:8032 0.0.0.0:* LISTEN 11229/java tcp 0 0 0.0.0.0:8033 0.0.0.0:* LISTEN 11229/java tcp 0 0 0.0.0.0:50020 0.0.0.0:* LISTEN 10682/java tcp 0 0 0.0.0.0:8040 0.0.0.0:* LISTEN 11455/java
http://hostname:50070
http://hostname:8088
echo "My first hadoop example. Hello Hadoop in input. " > /home/hadoop/input
hadoop fs -mkdir /user/hadooper
hadoop fs -put /home/hadoop/input /user/hadooper
hadoop jar /home/hadoop/hadoop-2.5.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.2.jar wordcount /user/hadooper/input /user/hadooper/output
hadoop@hostname:~/hadoop-2.5.2/share/hadoop/mapreduce$ hadoop jar /home/hadoop/hadoop-2.5.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.2.jar wordcount /user/hadooper/input /user/hadooper/output 14/11/23 19:45:04 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032 14/11/23 19:45:05 INFO input.FileInputFormat: Total input paths to process : 1 14/11/23 19:45:05 INFO mapreduce.JobSubmitter: number of splits:1 14/11/23 19:45:06 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1416742510596_0001 14/11/23 19:45:06 INFO impl.YarnClientImpl: Submitted application application_1416742510596_0001 14/11/23 19:45:07 INFO mapreduce.Job: The url to track the job: http://hostname:8088/proxy/application_1416742510596_0001/ 14/11/23 19:45:07 INFO mapreduce.Job: Running job: job_1416742510596_0001 14/11/23 19:45:18 INFO mapreduce.Job: Job job_1416742510596_0001 running in uber mode : false 14/11/23 19:45:18 INFO mapreduce.Job: map 0% reduce 0% 14/11/23 19:45:26 INFO mapreduce.Job: map 100% reduce 0% 14/11/23 19:45:36 INFO mapreduce.Job: map 100% reduce 100% 14/11/23 19:45:37 INFO mapreduce.Job: Job job_1416742510596_0001 completed successfully 14/11/23 19:45:37 INFO mapreduce.Job: Counters: 49 File System Counters FILE: Number of bytes read=102 FILE: Number of bytes written=195793 FILE: Number of read operations=0 FILE: Number of large read operations=0 FILE: Number of write operations=0 HDFS: Number of bytes read=168 HDFS: Number of bytes written=64 HDFS: Number of read operations=6 HDFS: Number of large read operations=0 HDFS: Number of write operations=2 Job Counters Launched map tasks=1 Launched reduce tasks=1 Data-local map tasks=1 Total time spent by all maps in occupied slots (ms)=5994 Total time spent by all reduces in occupied slots (ms)=6925 Total time spent by all map tasks (ms)=5994 Total time spent by all reduce tasks (ms)=6925 Total vcore-seconds taken by all map tasks=5994 Total vcore-seconds taken by all reduce tasks=6925 Total megabyte-seconds taken by all map tasks=6137856 Total megabyte-seconds taken by all reduce tasks=7091200 Map-Reduce Framework Map input records=1 Map output records=8 Map output bytes=80 Map output materialized bytes=102 Input split bytes=119 Combine input records=8 Combine output records=8 Reduce input groups=8 Reduce shuffle bytes=102 Reduce input records=8 Reduce output records=8 Spilled Records=16 Shuffled Maps =1 Failed Shuffles=0 Merged Map outputs=1 GC time elapsed (ms)=101 CPU time spent (ms)=2640 Physical memory (bytes) snapshot=422895616 Virtual memory (bytes) snapshot=2055233536 Total committed heap usage (bytes)=308281344 Shuffle Errors BAD_ID=0 CONNECTION=0 IO_ERROR=0 WRONG_LENGTH=0 WRONG_MAP=0 WRONG_REDUCE=0 File Input Format Counters Bytes Read=49 File Output Format Counters Bytes Written=64
hadoop@hostname:~/hadoop-2.5.2/share/hadoop/mapreduce$ hadoop fs -cat /user/hadooper/output/part-r-00000 Hadoop 1 Hello 1 My 1 example. 1 first 1 hadoop 1 in 1 input. 1
hadoop jar /home/hadoop/hadoop-2.5.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.2.jar pi 10 10
执行结果是3.200000000000000000
1. hadoop不正常推出后,重启后,NameNode将进入Safe Mode,不能提交任务,解决办法:
hadoop dfsadmin -safemode leave