大数据系列hadoop——MapReduce实例——单词计数

目录

一、简介

二、思路

三、实现


一、简介

             这个应该是mapReduce里面最简单,也是所有人写的第一个例子吧。

二、思路

             map阶段,分词,把每个词作为key,value为1

             reduce阶段,把每个词的value相加

三、实现

             

@Slf4j
public class WordCountJob {

	/**
	 * 
	 * @param args 0|profile;1|input;2|output;3|master-ip;4|operator;5|homeDir
	 * @throws Exception
	 */
	public static void main(String[] args) throws Exception {
		
		Configuration config = JobUtil.init(args);
		
		Job job = Job.getInstance(config);
		
		job.setJarByClass(WordCountJob.class);
		job.setJobName("wordCount");
		
		Path inputPath = new Path(args[1]);
		FileInputFormat.addInputPath(job, inputPath);
		
		Path outputPath = new Path(args[2]);
		if(outputPath.getFileSystem(config).exists(outputPath)) {
			outputPath.getFileSystem(config).delete(outputPath, true);
		}
		FileOutputFormat.setOutputPath(job, outputPath);
		
		job.setMapperClass(WordCountMapper.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		job.setNumReduceTasks(1);
		job.setReducerClass(WordCountReducer.class);
		
		boolean isSuccess = job.waitForCompletion(true);
		log.info("isSuccess:" + isSuccess);
		System.exit(isSuccess ? 0 : 1);
		
	}
	
}
@Slf4j
public class WordCountMapper extends Mapper {

	private final static IntWritable one = new IntWritable(1);
	private Text word = new Text();
	
	@Override
	public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
		log.info("key:" + key + ",value:" + value);
		StringTokenizer itr = new StringTokenizer(value.toString());
		while(itr.hasMoreElements()) {
			String wordStr = itr.nextToken();
			log.info("word:" + wordStr);
			word.set(wordStr);
			context.write(word, one);
		}
	}
	
}
@Slf4j
public class WordCountReducer extends Reducer {

	private IntWritable result = new IntWritable();
	
	@Override
	public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
		log.info("key:" + key);
		int sum = 0;
		for(IntWritable value : values) {
			log.info("value:" + value);
			sum = sum + value.get();
		}		
		result.set(sum);
		context.write(key, result);
	}
	
}

https://github.com/wulinfeng2/hadoopDemo

你可能感兴趣的:(大数据,hadoop)