MapReduce WordCount 代码初探

MapReduce WordCount 代码初探

简单wordcount mapreduce程序。

package m;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountMapReduce {

    /**
     * 文本格式:
     * 每行一个单词,统计各单词出现的次数
     * 
     */

    /**
     * 主方法,用于设置mapreduce job任务,以及启动任务。
     */
    public static void main(String[] args) {

        //Job的参数配置类
        Configuration conf = new Configuration();
        try {
            //mapreduce的Job类 用于配置各种参数
            Job job = Job.getInstance(conf, "word-count");

            //设置运行主类
            job.setJarByClass(WordCountMapReduce.class);

            //设置map任务主类、输出key类型、输出value类型
            job.setMapperClass(WordMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);

            //设置reduce任务主类、输出key类型、输出value类型
            job.setReducerClass(WordReduce.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);

            //文件输入输出路径,本例路径为本地
            String[] path = { "hdfs://localhost:9000/word", "hdfs://localhost:9000/output" };

            //设置输入输出路径
            FileInputFormat.setInputPaths(job, new Path(path[0]));
            FileOutputFormat.setOutputPath(job, new Path(path[1]));

            //启动任务,如果失败,则打印失败信息
            if(!job.waitForCompletion(true)){
                System.err.println("wordcount failed");
            }


        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    /**
     * Mapper实现类,内部类
     */
    private static class WordMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

        //创建输出key value对应的对象
        private Text test = new Text();
        private IntWritable intwritable = new IntWritable();
        //对于每一行文本都会调用此方法
        @Override
        protected void map(LongWritable key, Text value, Mapper.Context context)
                throws IOException, InterruptedException {
            //读取一行文本内容
            String row = value.toString();
            //设置key
            test.set(row);
            //设置value
            intwritable.set(1);
            //输出,交由reduce类处理
            context.write(test, intwritable);
        }
    }


    /**
     * Reducer实现类,内部类
     * 经过mapper处理 以及内部处理,reducer类得到相同键对应的所有value                              
     */
    private static class WordReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
        //创建输出key value对应对象
        private Text test = new Text();
        private IntWritable intwriteable = new IntWritable();

        //对于每一个不同的键,调用此方法一次
        @Override
        protected void reduce(Text key, Iterable values,
                Reducer.Context context) throws IOException, InterruptedException {
            //初始化计数
            int count = 0;
            //循环遍历key对应的每一个value,每一个value都对应单词出现一次,count++
            for (IntWritable intWritable : values) {
                count += intWritable.get();
            }
            //设置输出键
            test.set(key);
            //设置输出值
            intwriteable.set(count);
            //输出
            context.write(test, intwriteable);
        }
    }

}

你可能感兴趣的:(mapreduce)