开发环境:
hadoop 1.2.1
eclipse Version: Juno Service Release 2
JDK 1.7
ubuntu 12.04LTS
代码:
下面的代码本质上就是一个wordCount程序
package org.conan.myhadoop.mr; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class WordCount { public static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); @Override public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> { private IntWritable result = new IntWritable(); @Override public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { String input = "hdfs://192.168.56.101:9000/user/hdfs/o_t_account"; String output = "hdfs://192.168.56.101:9000/user/hdfs/o_t_account/result"; Configuration conf = new Configuration(); //conf.set("mapred.jar", "C:/Users/exinglo/Documents/DigDig/Hadoop/myHadoop/myHadoop.jar"); //conf.set("hadoop.job.user", "hadoop"); //conf.addResource("classpath:/hadoop/core-site.xml"); //conf.addResource("classpath:/hadoop/hdfs-site.xml"); //conf.addResource("classpath:/hadoop/mapred-site.xml"); //conf.set("fs.default.name", "hdfs://192.168.56.101:9000"); //conf.set("mapred.job.tracker", "192.168.56.101:9001"); Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(WordCountMapper.class); job.setCombinerClass(WordCountReducer.class); job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
Mar 16, 2014 4:00:40 PM org.apache.hadoop.mapred.JobClient copyAndConfigureFiles
WARNING: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
Mar 16, 2014 4:00:40 PM org.apache.hadoop.mapred.JobClient copyAndConfigureFiles
WARNING: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
Mar 16, 2014 4:00:40 PM org.apache.hadoop.mapreduce.lib.input.FileInputFormat listStatus
INFO: Total input paths to process : 1
Mar 16, 2014 4:00:40 PM org.apache.hadoop.util.NativeCodeLoader <clinit>
WARNING: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Mar 16, 2014 4:00:40 PM org.apache.hadoop.io.compress.snappy.LoadSnappy <clinit>
WARNING: Snappy native library not loaded
Mar 16, 2014 4:00:40 PM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO: Running job: job_201403151655_0012
Mar 16, 2014 4:00:41 PM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO: map 0% reduce 0%
Mar 16, 2014 4:00:57 PM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO: Task Id : attempt_201403151655_0012_m_000000_0, Status : FAILED
java.lang.RuntimeException: java.lang.ClassNotFoundException: org.conan.myhadoop.mr.WordCount$WordCountMapper
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:857)
at org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:199)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:718)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:364)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
我把这个WorldCount程序打成jar包在ubuntu上是可以正常运行,但如果继续在windows下运行程序的话需要按照下面的步骤:
1、网上好多说要设置这job.setJarByClass(WordCount.class);但是hadoop-1.1.2的worldcount源码是有job.setJarByClass(WordCount.class);我们找到conf所在的位置,加conf.set("mapred.jar", "C:/Users/admin/Documents/DigDig/Hadoop/myHadoop/myHadoop.jar"); 注意:"mapred.jar"不能改,后面是你导出jar的路径
2、把工程打成xx.jar就是上面的mapreduce.jar,(右击工程->选择Export->选择要输出的package),我放在"C:/Users/exinglo/Documents/DigDig/Hadoop/myHadoop/",一般最好放工程里面,默认路径下
最后运行就可以了,然后就可以正常运行了,哈哈....虽然还是有点麻烦,要打jar包还要每次设置conf.set(),但是至少目前不用换环境了,比起每次上传到ubuntu再运行方便了一点点,对于初学的我来说已经是很好的开始,我觉得自己肯定是哪还没有配置好,应该还可以继续改进,继续研究...希望有知道的大神留下宝贵的意见!!
这里的方法参考了http://www.th7.cn/Program/java/201312/166507.shtml
原因分析:
http://blog.csdn.net/zklth/article/details/5816435
上面这篇文章提到了一些可能原因,但是下面有人质疑JobTracer的作用,暂时先放在这里,等后面详细学习内部机制。