创建项目文件夹
sudo mkdir -p ~/hpro/com/vs/example
创建主程序类
sudo gedit ~/hpro/com/vs/example/WordCount.javajava类如下
package com.vs.example; import java.io.IOException; import java.util.*; import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.*; import org.apache.hadoop.io.*; import org.apache.hadoop.mapred.*; import org.apache.hadoop.util.*; public class WordCount { public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while(tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); } } } public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while(values.hasNext()) { sum += values.next().get(); } output.collect(key, new IntWritable(sum)); } } public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); } }创建类文件的目录
mkdir ~/hpro/FirstJar
javac -classpath ~/hadoop-1.0.4/hadoop-core-1.0.4.jar -d ~/hpro/FirstJar hpro/com/vs/example/WordCount.java
$JAVA_HOME/bin/jar -cvf ~/hpro/wordcount.jar -C ~/hpro/FirstJar/ .
echo "Hello World Bye World" > ~/hpro/file01
echo "Hello Hadoop GoodBye Hadoop" > ~/hpro/file02
~/hadoop-1.0.4/bin/hadoop dfs -mkdir input
~/hadoop-1.0.4/bin/hadoop dfs -put ~/hpro/file0* input
运行程序
~/hadoop-1.0.4/bin/hadoop jar ~/hpro/wordcount.jar com.vs.example.WordCount input output
http://localhost:50030/jobtracker.jsp
~/hadoop-1.0.4/bin/hadoop dfs -get output ~/hpro/
cd hpro/output
gedit part-00000文件part-00000的内容如下
Bye 1 Goodbye 1 Hadoop 2 Hello 2 World 2