Hadoop小兵笔记【三】利用Eclipse将wordcount打包成可以运行在hadoop上的jar包

hadoop版本为hadoop1.2.1

eclipse版本为eclipse-standard-kepler-SR2-win32-x86_64

WordCount.java为hadoop-1.2.1\src\examples\org\apache\hadoop\examples\WordCount.java

复制代码
 1 /**
 2  * Licensed under the Apache License, Version 2.0 (the "License");  3  * you may not use this file except in compliance with the License.  4  * You may obtain a copy of the License at  5  *  6  * http://www.apache.org/licenses/LICENSE-2.0
 7  *  8  * Unless required by applicable law or agreed to in writing, software  9  * distributed under the License is distributed on an "AS IS" BASIS, 10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11  * See the License for the specific language governing permissions and 12  * limitations under the License. 13  */
14 
15 
16 package org.apache.hadoop.examples; 17 
18 import java.io.IOException; 19 import java.util.StringTokenizer; 20 
21 import org.apache.hadoop.conf.Configuration; 22 import org.apache.hadoop.fs.Path; 23 import org.apache.hadoop.io.IntWritable; 24 import org.apache.hadoop.io.Text; 25 import org.apache.hadoop.mapreduce.Job; 26 import org.apache.hadoop.mapreduce.Mapper; 27 import org.apache.hadoop.mapreduce.Reducer; 28 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 29 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 30 import org.apache.hadoop.util.GenericOptionsParser; 31 
32 public class WordCount { 33 
34   public static class TokenizerMapper 35        extends Mapper<Object, Text, Text, IntWritable>{ 36     
37     private final static IntWritable one = new IntWritable(1); 38     private Text word = new Text(); 39       
40     public void map(Object key, Text value, Context context 41                     ) throws IOException, InterruptedException { 42       StringTokenizer itr = new StringTokenizer(value.toString()); 43       while (itr.hasMoreTokens()) { 44  word.set(itr.nextToken()); 45  context.write(word, one); 46  } 47  } 48  } 49   
50   public static class IntSumReducer 51        extends Reducer<Text,IntWritable,Text,IntWritable> { 52     private IntWritable result = new IntWritable(); 53 
54     public void reduce(Text key, Iterable<IntWritable> values, 55  Context context 56                        ) throws IOException, InterruptedException { 57       int sum = 0; 58       for (IntWritable val : values) { 59         sum += val.get(); 60  } 61  result.set(sum); 62  context.write(key, result); 63  } 64  } 65 
66   public static void main(String[] args) throws Exception { 67     Configuration conf = new Configuration(); 68     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 69     if (otherArgs.length != 2) { 70       System.err.println("Usage: wordcount <in> <out>"); 71       System.exit(2); 72  } 73     Job job = new Job(conf, "word count"); 74     job.setJarByClass(WordCount.class); 75     job.setMapperClass(TokenizerMapper.class); 76     job.setCombinerClass(IntSumReducer.class); 77     job.setReducerClass(IntSumReducer.class); 78     job.setOutputKeyClass(Text.class); 79     job.setOutputValueClass(IntWritable.class); 80     FileInputFormat.addInputPath(job, new Path(otherArgs[0])); 81     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); 82     System.exit(job.waitForCompletion(true) ? 0 : 1); 83  } 84 }
复制代码

在eclipse中新建java project,project名为WordCount

在project中新建class,类名为WordCount

再将上述代码覆盖eclipse中的WordCount.java

并将页首的package改了wordcount,改后的源码如下

复制代码
 1 package wordcount;  2 
 3 import java.io.IOException;  4 import java.util.StringTokenizer;  5 
 6 import org.apache.hadoop.conf.Configuration;  7 import org.apache.hadoop.fs.Path;  8 import org.apache.hadoop.io.IntWritable;  9 import org.apache.hadoop.io.Text; 10 import org.apache.hadoop.mapreduce.Job; 11 import org.apache.hadoop.mapreduce.Mapper; 12 import org.apache.hadoop.mapreduce.Reducer; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 import org.apache.hadoop.util.GenericOptionsParser; 16 
17 public class WordCount { 18 
19   public static class TokenizerMapper 20        extends Mapper<Object, Text, Text, IntWritable>{ 21     
22     private final static IntWritable one = new IntWritable(1); 23     private Text word = new Text(); 24       
25     public void map(Object key, Text value, Context context 26                     ) throws IOException, InterruptedException { 27       StringTokenizer itr = new StringTokenizer(value.toString()); 28       while (itr.hasMoreTokens()) { 29  word.set(itr.nextToken()); 30  context.write(word, one); 31  } 32  } 33  } 34   
35   public static class IntSumReducer 36        extends Reducer<Text,IntWritable,Text,IntWritable> { 37     private IntWritable result = new IntWritable(); 38 
39     public void reduce(Text key, Iterable<IntWritable> values, 40  Context context 41                        ) throws IOException, InterruptedException { 42       int sum = 0; 43       for (IntWritable val : values) { 44         sum += val.get(); 45  } 46  result.set(sum); 47  context.write(key, result); 48  } 49  } 50 
51   public static void main(String[] args) throws Exception { 52     Configuration conf = new Configuration(); 53     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 54     if (otherArgs.length != 2) { 55       System.err.println("Usage: wordcount <in> <out>"); 56       System.exit(2); 57  } 58     Job job = new Job(conf, "word count"); 59     job.setJarByClass(WordCount.class); 60     job.setMapperClass(TokenizerMapper.class); 61     job.setCombinerClass(IntSumReducer.class); 62     job.setReducerClass(IntSumReducer.class); 63     job.setOutputKeyClass(Text.class); 64     job.setOutputValueClass(IntWritable.class); 65     FileInputFormat.addInputPath(job, new Path(otherArgs[0])); 66     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); 67     System.exit(job.waitForCompletion(true) ? 0 : 1); 68   
69  } 70 }
复制代码

 

复制代码
 1 import org.apache.hadoop.conf.Configuration;  2 import org.apache.hadoop.fs.Path;  3 import org.apache.hadoop.io.IntWritable;  4 import org.apache.hadoop.io.Text;  5 import org.apache.hadoop.mapreduce.Job;  6 import org.apache.hadoop.mapreduce.Mapper;  7 import org.apache.hadoop.mapreduce.Reducer;  8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 10 import org.apache.hadoop.util.GenericOptionsParser;
复制代码

可以看到源码import了好几个hadoop自定义类,非JDK环境自带的类,所以需要把这些依赖包导入eclipse中,不然编译器如何能找到这些类呢,得明确让编译器知道这些类所在位置。

这时候编译并运行一下,会发现有如下错误

复制代码
Exception in thread "main" java.lang.Error: Unresolved compilation problems: The import org.apache.commons cannot be resolved The import org.apache.commons cannot be resolved The import org.codehaus cannot be resolved The import org.codehaus cannot be resolved Log cannot be resolved to a type LogFactory cannot be resolved Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type Log cannot be resolved to a type JsonFactory cannot be resolved to a type JsonFactory cannot be resolved to a type JsonGenerator cannot be resolved to a type at org.apache.hadoop.conf.Configuration.<init>(Configuration.java:60) at wordcount.WordCount.main(WordCount.java:52)
复制代码

原因是缺少依赖的jar库文件,再把缺少的jar库文件添加入库即可。

使用Add External JARs添加hadoop1.2.1\lib目录下所有jar文件。

再一次编译并运行,成功

 最后打包成为jar文件

file->export

其中,WordCount.jar不是强求与类名相同,可以改为其他,譬如CountWord.jar,关系不大,然后点击Finish即可。

之后就可以在hadoop上运行了。运行WordCount详解可以参考Hadoop集群(第6期)_WordCount运行详解

1 hadoop jar WordCount.jar WordCount input output

注意上述代码中是没有

1 package org.apache.hadoop.examples;

倘若使用了package,那么jar文件中就有层次的,不再如hadoop jar WordCount.jar WordCount input output就可以运行了,需要详细指出WordCount(这个是主类的类名),运行命令改为

hadoop jar WordCount.jar org.apache.hadoop.examples.WordCount input output

关于这里打包的内容,在[hadoop]命令行编译并运行hadoop例子WordCount有讲述



转载自:林羽飞扬

你可能感兴趣的:(hadoop,jar)