气象数据分析代码

package hadoop.test.csc;

import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MaxTemeratureMapper extends Configured implements Tool {
/**  
  * 计数器
  * 用于计数各种异常数据
  */  
enum Counter 
{
  LINESKIP, //出错的行
}

@Override
public int run(String[] args) throws Exception {
  Configuration conf = getConf();
  Job job = new Job(conf, "MaxTemeratureMapper"); // 任务名
  job.setJarByClass(MaxTemeratureMapper.class); // 指定Class
  FileInputFormat.addInputPath(job, new Path(args[0])); // 输入路径
  FileOutputFormat.setOutputPath(job, new Path(args[1])); // 输出路径
  job.setMapperClass(Map.class); // 调用上面Map类作为Map任务代码
  job.setCombinerClass(Reduce.class);
        job.setReducerClass(Reduce.class);
  
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setOutputKeyClass(Text.class); // 指定输出的KEY的格式
  job.setOutputValueClass(IntWritable.class); // 指定输出的VALUE的格式
  job.waitForCompletion(true);
  // 输出任务完成情况
  System.out.println("任务名称:" + job.getJobName());
  System.out.println("任务成功:" + (job.isSuccessful() ? "是" : "否"));
  System.out.println("输入行数:"
    + job.getCounters()
      .findCounter("org.apache.hadoop.mapred.Task$Counter",
        "MAP_INPUT_RECORDS").getValue());
  System.out.println("输出行数:"
    + job.getCounters()
      .findCounter("org.apache.hadoop.mapred.Task$Counter",
        "MAP_OUTPUT_RECORDS").getValue());
  System.out.println("跳过的行:"
    + job.getCounters().findCounter(Counter.LINESKIP).getValue());
  return job.isSuccessful() ? 0 : 1;
}
/**
  * MAP任务
  */
public static class Map extends
   Mapper<LongWritable, Text, Text, IntWritable> {
  public void map(LongWritable key, Text value, Context context)
    throws IOException, InterruptedException {
   String line = value.toString(); // 读取源数据
   try {
    // 数据处理
    String year = line.substring(15, 19);
    System.out.println(year);
    Text tYear = new Text(year); //
    int temperature = Integer.parseInt(line.substring(88, 93));
    IntWritable iTemperature = new IntWritable(temperature);
    context.write(tYear, iTemperature);
   } catch (java.lang.ArrayIndexOutOfBoundsException e) {
    context.getCounter(Counter.LINESKIP).increment(1); // 出错令计数器+1
    return;
   }
  }
}
/**
  * Reduce任务
  */
public static class Reduce extends
   Reducer<Text, IntWritable, Text, IntWritable> {
  public void reduce(Text key, Iterable<IntWritable> values,
    Context context) throws IOException, InterruptedException {
   int maxValue = Integer.MIN_VALUE;
   IntWritable result = new IntWritable();
   for (IntWritable var : values) {
    maxValue = Math.max(maxValue, var.get());
   }
   result.set(maxValue);
   context.write(key, result);
  }
}


/**  
  * 设置系统说明
  * 设置MapReduce任务
  */  
public static void main(String[] args) throws Exception 
{
  
  //判断参数个数是否正确
  //如果无参数运行则显示以作程序说明
  if ( args.length != 2 )
  {
   System.err.println("");
   System.err.println("Usage: MaxTemeratureMapper< input path > < output path > ");
   System.err.println("Example: hadoop jar ~/MaxTemeratureMapper.jar hdfs://10.3.19.199:9000/user/hadoop/TempratureData/  hdfs://10.3.19.199:9000/user/hadoop/TempratureData/Temerature_1_output");
   System.err.println("Counter:");
   System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short");
   System.exit(-1);
  }
  
  //记录开始时间
  DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" );
  Date start = new Date();
  
  //运行任务
  int res = ToolRunner.run(new Configuration(), new MaxTemeratureMapper(), args);
  //输出任务耗时
  Date end = new Date();
  float time =  (float) (( end.getTime() - start.getTime() ) / 60000.0) ;
  System.out.println( "任务开始:" + formatter.format(start) );
  System.out.println( "任务结束:" + formatter.format(end) );
  System.out.println( "任务耗时:" + String.valueOf( time ) + " 分钟" ); 
        System.exit(res);
}
}

你可能感兴趣的:(气象数据分析代码)