package hadoop.test.csc; import java.io.IOException; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class MaxTemeratureMapper extends Configured implements Tool { /** * 计数器 * 用于计数各种异常数据 */ enum Counter { LINESKIP, //出错的行 } @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, "MaxTemeratureMapper"); // 任务名 job.setJarByClass(MaxTemeratureMapper.class); // 指定Class FileInputFormat.addInputPath(job, new Path(args[0])); // 输入路径 FileOutputFormat.setOutputPath(job, new Path(args[1])); // 输出路径 job.setMapperClass(Map.class); // 调用上面Map类作为Map任务代码 job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); // 指定输出的KEY的格式 job.setOutputValueClass(IntWritable.class); // 指定输出的VALUE的格式 job.waitForCompletion(true); // 输出任务完成情况 System.out.println("任务名称:" + job.getJobName()); System.out.println("任务成功:" + (job.isSuccessful() ? "是" : "否")); System.out.println("输入行数:" + job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue()); System.out.println("输出行数:" + job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS").getValue()); System.out.println("跳过的行:" + job.getCounters().findCounter(Counter.LINESKIP).getValue()); return job.isSuccessful() ? 0 : 1; } /** * MAP任务 */ public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> { public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); // 读取源数据 try { // 数据处理 String year = line.substring(15, 19); System.out.println(year); Text tYear = new Text(year); // int temperature = Integer.parseInt(line.substring(88, 93)); IntWritable iTemperature = new IntWritable(temperature); context.write(tYear, iTemperature); } catch (java.lang.ArrayIndexOutOfBoundsException e) { context.getCounter(Counter.LINESKIP).increment(1); // 出错令计数器+1 return; } } } /** * Reduce任务 */ public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int maxValue = Integer.MIN_VALUE; IntWritable result = new IntWritable(); for (IntWritable var : values) { maxValue = Math.max(maxValue, var.get()); } result.set(maxValue); context.write(key, result); } } /** * 设置系统说明 * 设置MapReduce任务 */ public static void main(String[] args) throws Exception { //判断参数个数是否正确 //如果无参数运行则显示以作程序说明 if ( args.length != 2 ) { System.err.println(""); System.err.println("Usage: MaxTemeratureMapper< input path > < output path > "); System.err.println("Example: hadoop jar ~/MaxTemeratureMapper.jar hdfs://10.3.19.199:9000/user/hadoop/TempratureData/ hdfs://10.3.19.199:9000/user/hadoop/TempratureData/Temerature_1_output"); System.err.println("Counter:"); System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short"); System.exit(-1); } //记录开始时间 DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" ); Date start = new Date(); //运行任务 int res = ToolRunner.run(new Configuration(), new MaxTemeratureMapper(), args); //输出任务耗时 Date end = new Date(); float time = (float) (( end.getTime() - start.getTime() ) / 60000.0) ; System.out.println( "任务开始:" + formatter.format(start) ); System.out.println( "任务结束:" + formatter.format(end) ); System.out.println( "任务耗时:" + String.valueOf( time ) + " 分钟" ); System.exit(res); } }