MapReduce之WordCount

用户统计文件中的单词出现的个数

注意各个文件的导包,job的封装步骤

WordCountMapper.java

package top.wintp.mapreduce.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @description: description:
 * 

* @author: upuptop *

* @qq: 337081267 *

* @CSDN: http://blog.csdn.net/pyfysf *

* @cnblogs: http://www.cnblogs.com/upuptop *

* @blog: http://wintp.top *

* @email: [email protected] *

* @time: 2019/05/2019/5/21 *

*/ public class WordCountMapper extends Mapper { private Text K = new Text(); private IntWritable V = new IntWritable(1); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split(" "); for (String word : words) { K.set(word); context.write(K, V); } } }

WordCountReduce

package top.wintp.mapreduce.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @description: description:
 * 

* @author: upuptop *

* @qq: 337081267 *

* @CSDN: http://blog.csdn.net/pyfysf *

* @cnblogs: http://www.cnblogs.com/upuptop *

* @blog: http://wintp.top *

* @email: [email protected] *

* @time: 2019/05/2019/5/21 *

*/ public class WordCountReduce extends Reducer { private IntWritable V = new IntWritable(); @Override protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable value : values) { sum += value.get(); } V.set(sum); context.write(key, V); } }

WordCountRunner

package top.wintp.mapreduce.wordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * @description: description:
 * 

* @author: upuptop *

* @qq: 337081267 *

* @CSDN: http://blog.csdn.net/pyfysf *

* @cnblogs: http://www.cnblogs.com/upuptop *

* @blog: http://wintp.top *

* @email: [email protected] *

* @time: 2019/05/2019/5/21 *

*/ public class WordCountRunner implements Tool { private Configuration conf; public int run(String[] strings) throws Exception { //封装job Job job = Job.getInstance(this.conf); job.setJarByClass(WordCountRunner.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path("E:/input/wordcount/")); FileOutputFormat.setOutputPath(job, new Path("E:/output/wordcount/" + System.currentTimeMillis())); //提交任务 int result = job.waitForCompletion(true) ? 0 : 1; return result; } public void setConf(Configuration configuration) { this.conf = configuration; } public Configuration getConf() { return this.conf; } public static void main(String[] args) throws Exception { int status = ToolRunner.run(new WordCountRunner(), args); System.exit(status); } }

log4j.properties

log4j.rootLogger=INFO, stdout  
log4j.appender.stdout=org.apache.log4j.ConsoleAppender  
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout  
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n  
log4j.appender.logfile=org.apache.log4j.FileAppender  
log4j.appender.logfile.File=target/spring.log  
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout  
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n  

你可能感兴趣的:(MapReduce之WordCount)