java 气象数据_气象数据集例子Java程序代码

import java.io.*;

import org.apache.*;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapred.FileInputFormat;

import org.apache.hadoop.mapred.FileOutputFormat;

import org.apache.hadoop.mapred.JobClient;

import org.apache.hadoop.mapred.JobConf;

public class MaxTemperature{

public static void main(String[]args) throws IOException{

//args=new String[2];

//args[0]="/home/yukjin/Downsload/1901";

//args[1]="output";

if(args.length!=2){

System.err.println("Usage:MaxTemperature

System.exit(-1);

}

JobConf conf=new JobConf(MaxTemperature.class);//JobConf指定作业执行规范,可以使用它控制整个作业的运行

conf.setJobName("Max temperature");

FileInputFormat.addInputPath(conf,new Path(args[0]));//指定文件输入路径,路径既可以是单个文件也可以是某个目录,也可多次调用实现多路径输入

FileOutputFormat.setOutputPath(conf,new Path(args[1]));//指定文件输出路径,执行前该路径不能存在,负责hadoop拒绝运行该任务

conf.setMapperClass(MaxTemperatureMapper.class);

conf.setReducerClass(MaxTemperatureReducer.class);

conf.setOutputKeyClass(Text.class);

conf.setOutputValueClass(IntWritable.class);

JobClient.runJob(conf);

}

}

import java.io.*;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapred.MapReduceBase;

import org.apache.hadoop.mapred.Mapper;

import org.apache.hadoop.mapred.OutputCollector;

import org.apache.hadoop.mapred.Reporter;

public class MaxTemperatureMapper extends MapReduceBase implements Mapper{

private static final int MISSING=9999;

public void map(LongWritable key,Text value,OutputCollectoroutput,Reporter reporter)throws IOException{

String line=value.toString();//Text类型转换为String类型

String year=line.substring(15,19);//截取年份

int airtemperature;

if(line.charAt(87)=='+'){

airtemperature=Integer.parseInt(line.substring(88,92));

}

else{

airtemperature=Integer.parseInt(line.substring(87,92));

}

String quality=line.substring(92,93);

if(airtemperature!=MISSING&&quality.matches("[01459]")){

output.collect(new Text(year),new IntWritable(airtemperature));

}

}

}

Mapper接口是一个泛型类型,需要指定4个参数类型,分别指定Map函数的输入键,输入值,输出键,输出值,此例中输入键位LongWritable(长整型偏移量),输入值Text(一行文本),输出键Text(年份),以及输出值IntWritable(气温)。

Hadoop自身提供一套可优化网络序列化传输的基本类型,而不直接使用java的的基本类型,这些类型在org.apache.hadoop.io包中可以找到。

import java.io.*;

import java.util.Iterator;

import org.apache.hadoop.mapred.MapReduceBase;

import org.apache.hadoop.mapred.OutputCollector;

import org.apache.hadoop.mapred.Reducer;

import org.apache.hadoop.mapred.Reporter;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

public class MaxTemperatureReducer extends MapReduceBase implements Reducer{

public void reduce(Text key,Iteratorvalues,OutputCollectoroutput,Reporter reporter)throws IOException{

int maxValue=Integer.MIN_VALUE;

while(values.hasNext()){

maxValue=Math.max(maxValue,values.next().get());

}

output.collect(key,new IntWritable(maxValue));

}

}

Reducer接口同样也是泛化类型,需要四个参数,分别指定Reduce函数的输入键,输入值,输出键以及输出值。Reduce函数的输入键值必须与Map函数的输出键值匹配。

你可能感兴趣的:(java,气象数据)