0.MaxValue:要求输出cite75_99.txt中最大的CITED值:
要点:
1.Mapper只输出它所处理的数据中的最大值。(重写cleanup()函数)
2.设置Reducer数目为一个 -D mapred.reduce.tasks=1,同时也只输出所处理的最大值。(重写cleanup()函数)
3.cleanup()函数:在任务结束时执行一次。详见API。
代码如下:
/* * MaxValues * 函数作用:输出Patent中最大数值 * Author: jokes000 * Date: 2011-12-15 */ import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class MaxValue extends Configured implements Tool { public static class MapClass extends Mapper<LongWritable,Text,Text,Text> { int max = 0; // Map Method public void map(LongWritable key, Text value, Context context){ String[] citation = value.toString().split(",", 2); try { int tmp = Integer.parseInt(citation[0]); if( tmp > max ) max = tmp; } catch(NumberFormatException e){ // do nothing. } //context.write(new Text(citation[0]), new Text(citation[0])); } @Override protected void cleanup(Context context) throws IOException, InterruptedException { context.write(new Text(max+""), new Text(max+"")); } } public static class Reduce extends Reducer<Text,Text,Text,IntWritable> { int max = 0; // Reduce Method public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //IntWritable[] top = new IntWritable[10]; for(Text value : values) { try { int tmp = Integer.parseInt(value.toString()); if( tmp > max ) max = tmp; } catch(NumberFormatException e) { // do nothing. } } //context.write(new Text("Max"), new IntWritable(max)); } @Override protected void cleanup(Context context) throws IOException, InterruptedException { context.write(new Text("Max"), new IntWritable(max)); } } @Override public int run(String[] arg0) throws Exception { Job job = new Job(); job.setJarByClass(MaxValue.class); FileInputFormat.addInputPath(job, new Path(arg0[0])); FileOutputFormat.setOutputPath(job, new Path(arg0[1])); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.waitForCompletion(true); return 0; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new MaxValue(), args); System.exit(res); } }
1.Top K Values: 要求输出adapt63_99.txt中的第9列CLAIMS值的最大的K个值:
要点:
1.Mapper值输出它所处理的数据中的最大的K个值。(重写 cleanup()函数)
2.设置Reducer数目为1 -D mapred.reduce.tasks=1,同时对Mapper中输出进行排序,输出最大的K个值(重写 cleanup()函数)
代码如下:
/* * TopKValues * 函数作用:输出CLAIMS中最大的几个数值 * Author: jokes000 * Date: 2011-12-15 */ import java.io.IOException; import java.util.Arrays; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class TopKValues extends Configured implements Tool { public static class MapClass extends Mapper<LongWritable,Text,Text,IntWritable> { // 全局变量 int len; // K值 int[] top; // 用于保存的数组 // Map Method public void map(LongWritable key, Text value, Context context) { String[] fields = value.toString().split(",",-20); try { int claims = Integer.parseInt(fields[8]); add(claims); } catch(NumberFormatException e) { // do nothing.. } } private void add(int value) { top[0] = value; Arrays.sort(top); } @Override protected void setup(Context context) { // 获取设置的"K"值,若没有K值,则设置该值为10 len = context.getConfiguration().getInt("K", 10); top = new int[len+1]; } @Override protected void cleanup(Context context) throws IOException, InterruptedException { for( int i = 1; i <= len; ++ i ) { context.write(new Text(top[i]+""), new IntWritable(top[i])); } } } public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable> { int[] top; int len; @Override protected void setup(Context context) { len = context.getConfiguration().getInt("K", 10); top = new int[len+1]; } private void add(int value) { top[0] = value; Arrays.sort(top); } // Reduce Method public void reduce(Text key, Iterable<IntWritable> values, Context context) { for(IntWritable value : values) { add(value.get()); } } @Override protected void cleanup(Context context) throws IOException, InterruptedException { for( int i = len; i > 0; -- i ) { context.write(new Text("No."+(len-i+1)), new IntWritable(top[i])); } } } @Override public int run(String[] arg0) throws Exception { Job job = new Job(); job.setJarByClass(TopKValues.class); FileInputFormat.addInputPath(job, new Path(arg0[0])); FileOutputFormat.setOutputPath(job, new Path(arg0[1])); try{ int K = Integer.parseInt(arg0[2]); getConf().setInt("K", K); } catch(NumberFormatException e) { // do nothing.. getConf().setInt("K", 20); } job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.waitForCompletion(true); return 0; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TopKValues(), args); System.exit(res); } }