hadoop 气象数据分析

 学习hadoop几天了,配置了三天终端,运行了几个例子。现在自己写一个程序,根据气象数据,计算当年的最高气温。
模拟美国气象局发布的数据。数据格式为:0000001010 195005+ 00001+99999999(其中第一个红色数字为年份,第二个数字为气温)。以1950为键,00001为值。
模拟数据为:
0000001010195005+00001+99999999
0000001010195005+00200+99999999
0000001010195005-00040+99999999
0000001010195005-00030+99999999
0000001010195005+00500+99999999
0000001010195005+00303+99999999
0000001010195005-00435+99999999
0000001010195005+00342+99999999
0000001010195005+00034+99999999
0000001010195005-00023+99999999
0000001010195005+00034+99999999
0000001010195005+00234+99999999
0000001010195005-00034+99999999
0000001010195005+00022+99999999
0000001010195005+00940+99999999
0000001010195005-00034+99999999
0000001010195005+00244+99999999
0000001010195005-00034+99999999
0000001010195005-00022+99999999
0000001010195005+00090+99999999
0000001010195005-00240+99999999

0000001010195105+00001+99999999
0000001010195105+00200+99999999
0000001010195105-00040+99999999
0000001010195105-00030+99999999
0000001010195105+00500+99999999
0000001010195105+00303+99999999
0000001010195105-00435+99999999
0000001010195105+00342+99999999
0000001010195105+00034+99999999
0000001010195105-00023+99999999
0000001010195105+00034+99999999
0000001010195105+00234+99999999
0000001010195105-00034+99999999
0000001010195105+00022+99999999
0000001010195105+00940+99999999
0000001010195105-00034+99999999
0000001010195105+00244+99999999
0000001010195105-00034+99999999
0000001010195105-00022+99999999
0000001010195105+00090+99999999
0000001010195105-00240+99999999
其中1950年和1951年的数据是相同的,目的是为了测试两年的最高气温的计算结果是否相同。

下面列举代码:

package climate;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class MaxTemeratureMapper extends Mapper{

    @Override
    protected void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {
        String line = value.toString();
       
        if(line.length() < 14){
            System.out.println("less then 14");
        }else{
            String year = line.substring(10, 14);
            Text tYear = new Text(year);    //
           
            int temperature =  Integer.parseInt(line.substring(17, 22));
            IntWritable iTemperature = new IntWritable(temperature);
           
            context.write(tYear, iTemperature);
        }
    }
}

package climate;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class MaxTemperatureReduce extends Reducer{

    @Override
    protected void reduce(Text key, Iterable values,Context context)
            throws IOException, InterruptedException {
        int maxValue = Integer.MIN_VALUE;
        IntWritable result = new IntWritable();
       
        for(IntWritable var : values){
            maxValue = Math.max(maxValue, var.get());
        }
        result.set(maxValue);
        context.write(key, result);
    }
}

package climate;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Climate {
   
    public static void main(String [] args) throws Exception{
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
       
        if (otherArgs.length != 2) {
          System.err.println("Usage: climate in out directory");
          System.exit(2);
        }
       
        long startTime = System.currentTimeMillis();
       
        Job job = new Job(conf, "climate");
        job.setJarByClass(Climate.class);
        job.setMapperClass(MaxTemeratureMapper.class);
        job.setCombinerClass(MaxTemperatureReduce.class);
        job.setReducerClass(MaxTemperatureReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
       
        if(job.waitForCompletion(true))
        {
            System.out.println("dealing time:"+ (System.currentTimeMillis() - startTime) + "ms");
        }
       
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

(3)运行结果
1950    940
1951    940


你可能感兴趣的:(Hadoop,hadoop,数据分析,string,exception,class,path)