map阶段:
import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class MyMapper extends Mapper<LongWritable, Text, LongWritable, IntWritable> { /** * 定义一个枚举类型 * @date 2016年3月25日 下午3:29:44 * @{tags} */ public static enum FileRecorder{ ErrorRecorder, TotalRecorder } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { if("error".equals(value.toString())){ /** * 把counter实现累加 */ context.getCounter(FileRecorder.ErrorRecorder).increment(1); } /** * 把counter实现累加 */ context.getCounter(FileRecorder.TotalRecorder).increment(1); } }启动函数:
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import com.seven.mapreduce.counter.MyMapper.FileRecorder; public class JobMain { public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); /** * 使NLineInputFormat来分割一个小文件,近而模拟分布式大文件的处理 */ configuration.setInt("mapreduce.input.lineinputformat.linespermap", 5); Job job = new Job(configuration, "counter-job"); job.setInputFormatClass(NLineInputFormat.class); job.setJarByClass(JobMain.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputDir = new Path(args[1]); FileSystem fs = FileSystem.get(configuration); if( fs.exists(outputDir)) { fs.delete(outputDir ,true); } FileOutputFormat.setOutputPath(job, outputDir); if(job.waitForCompletion(true) ? true: false) { System.out.println("Error num:" + job.getCounters().findCounter(FileRecorder.ErrorRecorder).getValue()); System.out.println("Total num:" + job.getCounters().findCounter(FileRecorder.TotalRecorder).getValue()); } } }运行结果:
由上可以看出总共跑了5个map任务,而且通过Counter实现了不同JVM中的全局累加器的功能。关于除自定义Counter以外的其它Counter的含义可以参考《MapReduce-Counters含义》