hadoop错误之ClassNotFoundException

开发环境: 

hadoop 1.2.1

eclipse Version: Juno Service Release 2

JDK 1.7

ubuntu 12.04LTS


代码:

下面的代码本质上就是一个wordCount程序


package org.conan.myhadoop.mr;

import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount {

    public static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable> {
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        @Override
        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            StringTokenizer itr = new StringTokenizer(value.toString());
            while (itr.hasMoreTokens()) {
                word.set(itr.nextToken());
                context.write(word, one);
            }

        }
    }

    public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
        private IntWritable result = new IntWritable();

        @Override
        public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
        	int sum = 0;
        	for (IntWritable val : values) {
        	sum += val.get();
        	}
        	result.set(sum);
        	context.write(key, result);
        }
    }

    public static void main(String[] args) throws Exception {
        String input = "hdfs://192.168.56.101:9000/user/hdfs/o_t_account";
        String output = "hdfs://192.168.56.101:9000/user/hdfs/o_t_account/result";

        Configuration conf = new Configuration();
        
        //conf.set("mapred.jar", "C:/Users/exinglo/Documents/DigDig/Hadoop/myHadoop/myHadoop.jar");
        //conf.set("hadoop.job.user", "hadoop");
        //conf.addResource("classpath:/hadoop/core-site.xml");
        //conf.addResource("classpath:/hadoop/hdfs-site.xml");
        //conf.addResource("classpath:/hadoop/mapred-site.xml");

        //conf.set("fs.default.name", "hdfs://192.168.56.101:9000");
        //conf.set("mapred.job.tracker", "192.168.56.101:9001");
        Job job = new Job(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(WordCountMapper.class);
        job.setCombinerClass(WordCountReducer.class);
        job.setReducerClass(WordCountReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(input));
        FileOutputFormat.setOutputPath(job, new Path(output));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}

错误输出:

Mar 16, 2014 4:00:40 PM org.apache.hadoop.mapred.JobClient copyAndConfigureFiles
WARNING: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
Mar 16, 2014 4:00:40 PM org.apache.hadoop.mapred.JobClient copyAndConfigureFiles
WARNING: No job jar file set.  User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
Mar 16, 2014 4:00:40 PM org.apache.hadoop.mapreduce.lib.input.FileInputFormat listStatus
INFO: Total input paths to process : 1
Mar 16, 2014 4:00:40 PM org.apache.hadoop.util.NativeCodeLoader <clinit>
WARNING: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Mar 16, 2014 4:00:40 PM org.apache.hadoop.io.compress.snappy.LoadSnappy <clinit>
WARNING: Snappy native library not loaded
Mar 16, 2014 4:00:40 PM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO: Running job: job_201403151655_0012
Mar 16, 2014 4:00:41 PM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO:  map 0% reduce 0%
Mar 16, 2014 4:00:57 PM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO: Task Id : attempt_201403151655_0012_m_000000_0, Status : FAILED
java.lang.RuntimeException: java.lang.ClassNotFoundException: org.conan.myhadoop.mr.WordCount$WordCountMapper
	at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:857)
	at org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:199)
	at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:718)
	at org.apache.hadoop.mapred.MapTask.run(MapTask.java:364)
	at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:415)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
	at org.apache.hadoop.mapred.Child.main(Child.java:249)

解决方案:

我把这个WorldCount程序打成jar包在ubuntu上是可以正常运行,但如果继续在windows下运行程序的话需要按照下面的步骤:
  1、网上好多说要设置这job.setJarByClass(WordCount.class);但是hadoop-1.1.2的worldcount源码是有job.setJarByClass(WordCount.class);我们找到conf所在的位置,加conf.set("mapred.jar", "C:/Users/admin/Documents/DigDig/Hadoop/myHadoop/myHadoop.jar");  注意:"mapred.jar"不能改,后面是你导出jar的路径
  2、把工程打成xx.jar就是上面的mapreduce.jar,(右击工程->选择Export->选择要输出的package),我放在"C:/Users/exinglo/Documents/DigDig/Hadoop/myHadoop/",一般最好放工程里面,默认路径下
最后运行就可以了,然后就可以正常运行了,哈哈....虽然还是有点麻烦,要打jar包还要每次设置conf.set(),但是至少目前不用换环境了,比起每次上传到ubuntu再运行方便了一点点,对于初学的我来说已经是很好的开始,我觉得自己肯定是哪还没有配置好,应该还可以继续改进,继续研究...希望有知道的大神留下宝贵的意见!!

这里的方法参考了http://www.th7.cn/Program/java/201312/166507.shtml


原因分析:

http://blog.csdn.net/zklth/article/details/5816435

上面这篇文章提到了一些可能原因,但是下面有人质疑JobTracer的作用,暂时先放在这里,等后面详细学习内部机制。

你可能感兴趣的:(hadoop错误之ClassNotFoundException)