Mapreduce单词计数的例子

WordCount类

package org.apache.hadoop.examples; //著名源文件存放的地方

import java.io.IOException;
import jvaa.util.StringTokenizer; //将字符串按空格分开

import org.apache.hadoop.conf.Configuration //处理配置文件
import org.apache.hadoop.fs.path; //处理文件路径
import org.apache.hadoop.io.IntWritable; //写入一个整型数字
import org.apache.hadoop.io.Text; //写文本的类,是上面IntWriteable的一个子类
import org.apache.hadoop.mapreduce.Job; //Job类处理单词计数的
import org.apache.hadoop.mapreduce.Mapper; //一个map基类,会被继承
import org.apache.hadoop.mapreduce.Reducer; //基类
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //输入文件类
import org.apache.hadoop.mapreduce.lib.output.FileInputFormat; //输出文件类
import org.apache.hadoop.util.GenericOptionsParser;//解析命令行参数的一个基本类

public class WordCount{
    public static class TokenizerMapper //Map
        extends Mapper<Object, Text, Text, IntWritable>
        {
            private final static IntWritable one = new IntWritable(1); //实例化IntWriteable类
            private Text word = new Text();//实例化
            public void map(Object key, Text value, Context context)
                throws IOException, InterruptedExeption{
                    StringTokenizer itr = new StringTokenizer(value.toString)//将value字符串分开
                    while(itr.hasMoreTokens()){ //有单词就循环
                        word.set(itr.nextToken()); //指向下一个单词
                        context.write(word, one);//遇到一个单词就写成(word,1)
                    }
                }
        }

    public static class IntSumReducer  //Reduce
        extends Reducer<Object, Text, Text, IntWritable>
        {
            private IntWritable result = new IntWritable();
            public void reduce (Text key, Iterable values, context context)//context 记录输入的key和value
                throws IOException, InterruptedException{
                    int sum = 0;
                    for (IntWritable val: values){ //遍历values
                        sum += val.get();
                    }
                    result.set(sum);
                    context.write(key, result);
                }
        }
}

public static void main (String [] args) throws Exception{
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if(otherArgs.length!=2){
        System.err.println("Usage arg err!");
        System(2);
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper);
    job.setCombinerClass(IntSumReducer);
    job.setReducerClass(IntSumReducer);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);//定义输出value类型
    FileInputFormat.addInputPath(job, new Path(otherArgs[0));
    FileOutPutFormat.addOuputtPath(job, new path(otherArgs[1])
    System.exit(job.waitForCompletion(true?1:0);
}

你可能感兴趣的:(Mapreduce单词计数的例子)