一 OutputFormat作用
1校验job中指定输出路径是否存在
2将结果写入输出文件
二 OutputFormat的实现
2.1DBOutputFormat: 发送Reduce结果到SQL表中
2.2FileOutputFormat: 将Reduce结果写入文件中
2.2.1MapFileOutputFormat: 主要是处理MapFile(特殊的SequenceFile)的输出
2.2.2SequenceFileOutputFormat: 主要是处理SequenceFile的输出
2.2.3TextFileOutputFormat: 主要是处理普通文本的输出,也是默认实现
2.3FilterOutputFormat:主要就是方便包装其他OutputFromat(没用过)
2.4NullOutputFormat: 把所有的输出放到/dev/null(没用过)
三 MultipleOutputs
在有些场景中,我们需要将Map-Reduce结果输出到多个文件中,我们就可以使用MapOutputs这个类。
MultipleOutputs的使用步骤:
3.1我们需要在Mapper中setup方法实例化MapOutputs
3.2在map方法中使用MapOutputs对象进行write, 并且需要把你的文件命传入write方法中
3.3在完成后需要在close方法中关闭MapOutputs
3.4最后生成的结果就是 你传入的文件名-m|r-0000这样的序列
public class OutputMultipleFile extends Configured implements Tool{
public static class OutputMultipleMapper extendsMapper
private Text key1 = new Text();
private Text value1 = new Text();
private MultipleOutputs
@Override
protected void cleanup(Mapper
throws IOException, InterruptedException {
super.cleanup(context);
mos.close();
}
@Override
protected void setup(Mapper
throws IOException, InterruptedException {
super.setup(context);
mos = new MultipleOutputs
}
@Override
protected void map(LongWritable key, Text value, Mapper
throws IOException, InterruptedException {
if (value == null) {
return;
}
StringTokenizer tokenizer = new StringTokenizer(value.toString());
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
key1.set(token);
value1.set("=>"+key1);
mos.write(key1, value1, generateFileName(key1));
}
}
private String generateFileName(Text key){
if (key == null) {
return "default";
}
int len = key.toString().length();
if (len <5) {
return "primary";
}
return "extended";
}
}
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
//对数组长度进行校验
if (otherArgs.length < 2) {
System.err.println("Usage:wordcount
System.exit(2);
}
Job job = Job.getInstance(conf,this.getClass().getSimpleName());
//设置要运行的任务
job.setJarByClass(OutputMultipleFile.class);
//设置输入路径
Path in = new Path(args[0]);
FileInputFormat.addInputPath(job, in);
//设置输出路径
Path out = new Path(args[1]);
FileOutputFormat.setOutputPath(job, out);
//设置要运行的Mapper
job.setMapperClass(OutputMultipleMapper.class);
//设置Mapper的输出key和输出value的类型
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setNumReduceTasks(0);
boolean isSuccess = job.waitForCompletion(Boolean.TRUE);
return isSuccess ? 0 : 1;
}
public static void main(String[] args) throws Exception {
int num = new Random().nextInt(1000);
if (args == null || args.length == 0) {
args = new String[]{
"hdfs://hdfs-cluster/user/hadoop/input",
"hdfs://hdfs-cluster/user/hadoop/output"+num
};
}
int status = new OutputMultipleFile().run(args);
System.exit(status);
}
}