1、为何使用Partitioner,主要是想reduce的结果能够根据key再次分类输出到不同的文件夹中。
2、结果能够直观,同时做到对数据结果的简单的统计分析。
1、输入的数据文件内容如下(1条数据内容少,1条数据内容超长,3条数据内容正常):
kaka 1 28 hua 0 26 chao 1 tao 1 22 mao 0 29 22
2、目的是为了分别输出结果,正确的结果输出到一个文本,太短的数据输出到一个文本,太长的输出到一个文本,共三个文本输出。
代码
package com.partition;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
import com.hadoop.mapred.WordCount.Map;
import com.hadoop.mapred.WordCount.Reduce;
//Partitioner函数的使用
public class MyPartitioner {
//Map函数
public static class MyMap extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>{
public void map(LongWritable key, Text value,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
String [] arr_value = value.toString().split("\t");
Text word1 = new Text();
Text word2 = new Text();
if(arr_value.length > 3){
word1.set("long");
word2.set(value);
}else if(arr_value.length < 3){
word1.set("short");
word2.set(value);
}else {
word1.set("right");
word2.set(value);
}
output.collect(word1, word2);
}
}
public static class MyPartitionerPar implements Partitioner<Text, Text> {
@Override
public int getPartition(Text key, Text value, int numPartitions) {
int result = 0;
System.out.println("numPartitions--"+numPartitions);
if (key.toString().equals("long")) {
result = 0 % numPartitions;
} else if (key.toString().equals("short")) {
result = 1 % numPartitions;
} else if (key.toString().equals("right")) {
result = 2 % numPartitions;
}
return result;
}
@Override
public void configure(JobConf arg0) {
// TODO Auto-generated method stub
}
}
public static class MyReduce extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
int sum = 0;
while (values.hasNext()) {
output.collect(key, new Text(values.next().getBytes()));
}
}
}
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(MyPartitioner.class);
conf.setJobName("MyPartitioner");
conf.setNumReduceTasks(3);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
conf.setPartitionerClass(MyPartitionerPar.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(MyMap.class);
conf.setReducerClass(MyReduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
}