Mapreduce编写wordcount程序

Map组件编写

package wc;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

//                                      泛型                 k1       v1    k2       v2
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{

    @Override
    protected void map(LongWritable key1, Text value1, Context context)
            throws IOException, InterruptedException {
        /*
         * context 表示Mapper的上下文
         * 上文:HDFS
         * 下文:Mapper
         */
        //数据: I love Beijing
        String data = value1.toString();

        //分词
        String[] words = data.split(" ");

        //输出 k2    v2
        for(String w:words){
            context.write(new Text(w), new IntWritable(1));
        }
    }
}

Reducer组件编写

package wc;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

//                                             k3      v3         k4       v4
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

    @Override
    protected void reduce(Text k3, Iterable v3,Context context) throws IOException, InterruptedException {
        /*
         * context是reduce的上下文
         * 上文
         * 下文 
         */
        //对v3求和
        int total = 0;
        for(IntWritable v:v3){
            total += v.get();
        }

        //输出   k4 单词   v4  频率
        context.write(k3, new IntWritable(total));
    }

}

Main方法

package wc;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountMain {

    public static void main(String[] args) throws Exception {
        // 创建一个job和任务入口
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(WordCountMain.class);  //main方法所在的class

        //指定job的mapper和输出的类型
        job.setMapperClass(WordCountMapper.class);
        job.setMapOutputKeyClass(Text.class);    //k2的类型
        job.setMapOutputValueClass(IntWritable.class);  //v2的类型

        //指定job的reducer和输出的类型
        job.setReducerClass(WordCountReducer.class);
        job.setOutputKeyClass(Text.class);  //k4的类型
        job.setOutputValueClass(IntWritable.class);  //v4的类型

        //指定job的输入和输出
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        //执行job
        job.waitForCompletion(true);
    }
}

你可能感兴趣的:(大数据笔记)