暂时先放到这个系列里面吧,勿怪!!!
package com.joey.mapred.wordcount; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import java.io.IOException; public class WordCount extends Configured implements Tool { static public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> { final private static LongWritable ONE = new LongWritable(1); private Text tokenValue = new Text(); @Override protected void map(LongWritable offset, Text text, Context context) throws IOException, InterruptedException { for (String token : text.toString().split("\\s+")) { tokenValue.set(token); context.write(tokenValue, ONE); } } } static public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> { private LongWritable total = new LongWritable(); @Override protected void reduce(Text token, Iterable<LongWritable> counts, Context context) throws IOException, InterruptedException { long n = 0; for (LongWritable count : counts) n += count.get(); total.set(n); context.write(token, total); } } public int run(String[] args) throws Exception { Configuration configuration = getConf(); Job job = new Job(configuration, "Word Count"); job.setJarByClass(WordCount.class); job.setMapperClass(WordCountMapper.class); job.setCombinerClass(WordCountReducer.class); job.setReducerClass(WordCountReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); return job.waitForCompletion(true) ? 0 : -1; } public static void main(String[] args) throws Exception { System.exit(ToolRunner.run(new WordCount(), args)); } }
package com.joey.mapred.wordcount; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.junit.Before; import org.junit.Test; public class TestWordCount { /*We declare three variables for Mapper Driver , Reducer Driver , MapReduceDrivers Generics parameters for each of them is point worth noting MapDriver generics matches with our test Mapper generics SMSCDRMapper extends Mapper<LongWritable, Text, Text, IntWritable> Similarly for ReduceDriver we have same matching generics declaration with SMSCDRReducer extends Reducer<Text, IntWritable, Text, IntWritable>*/ MapReduceDriver<LongWritable, Text, Text, LongWritable, Text, LongWritable> mapReduceDriver; MapDriver<LongWritable, Text, Text, LongWritable> mapDriver; ReduceDriver<Text, LongWritable, Text, LongWritable> reduceDriver; //create instances of our Mapper , Reducer . //Set the corresponding mappers and reducers using setXXX() methods @Before public void setUp() { WordCount.WordCountMapper mapper = new WordCount.WordCountMapper(); WordCount.WordCountReducer reducer = new WordCount.WordCountReducer(); mapDriver = new MapDriver<LongWritable, Text, Text, LongWritable>(); mapDriver.setMapper(mapper); reduceDriver = new ReduceDriver<Text, LongWritable, Text, LongWritable>(); reduceDriver.setReducer(reducer); mapReduceDriver = new MapReduceDriver<LongWritable, Text, Text, LongWritable, Text, LongWritable>(); mapReduceDriver.setMapper(mapper); mapReduceDriver.setReducer(reducer); } @Test public void testMapper() throws IOException { //gave one sample line input to the mapper mapDriver.withInput(new LongWritable(1), new Text("sky sky sky oh my beautiful sky")); //expected output for the mapper mapDriver.withOutput(new Text("sky"), new LongWritable(1)); mapDriver.withOutput(new Text("sky"), new LongWritable(1)); mapDriver.withOutput(new Text("sky"), new LongWritable(1)); mapDriver.withOutput(new Text("oh"), new LongWritable(1)); mapDriver.withOutput(new Text("my"), new LongWritable(1)); mapDriver.withOutput(new Text("beautiful"), new LongWritable(1)); mapDriver.withOutput(new Text("sky"), new LongWritable(1)); //runTest() method run the Mapper test with input mapDriver.runTest(); } @Test public void testReducer() throws IOException { List<LongWritable> values = new ArrayList<LongWritable>(); values.add(new LongWritable(1)); values.add(new LongWritable(1)); reduceDriver.withInput(new Text("sky"), values); reduceDriver.withOutput(new Text("sky"), new LongWritable(2)); reduceDriver.runTest(); } @Test public void testMapReduce() throws IOException { mapReduceDriver.withInput(new LongWritable(1), new Text("sky sky sky")); mapReduceDriver.addOutput(new Text("sky"), new LongWritable(3)); mapReduceDriver.runTest(); } }