在新API中 用 org.apache.hadoop.mapreduce.lib.output.MultipleOutputs 整合了上面旧API两个的功能
MultipleOutputs 作用
MultipleOutputs.write(namedOutput, key, value, baseOutputPath);
此时还需要调用
MultipleOutputs.addNamedOutput(job, namedOutput,outputFormatClass,keyClass, valueClass)
例子
package example;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
/**
* 多个路径输入
* @author lijl
*
*/
public class MultiOutputFileMR {
static class MultiOutputFileMapper extends Mapper<LongWritable, Text, Text, Text>{
public void map(LongWritable key,Text value,Context context){
try {
String[] str = value.toString().split("\\|");
context.write(new Text(str[0]), new Text(str[1]));
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
static class MultiOutputFileReducer extends Reducer<Text, Text, Text, Text>{
MultipleOutputs<Text, Text> collector = null;
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
collector.close();
}
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
collector = new MultipleOutputs<Text, Text>(context);
}
public void reduce(Text key,Iterable<Text> values,Context context){
try {
for(Text value:values){
// collector.write( key, value,"/sina_yq/path7/a");
// collector.write("a1", key, value);
// collector.write("a2", key, value);
// collector.write("a3", key, value);
// collector.write("a4", key, value,"/sina_yq/path7/");
collector.write( key, value,"/sina_yq/path7/"+key.toString());
}
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
static class MultiOutPutTestFormat extends MultipleTextOutputFormat<Text, Text>{
protected String generateFileNameForKeyValue(Text key,Text value, String name) {
return key.toString();
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
Job job = new Job(conf,"MultiPathFileInput");
job.setJarByClass(MultiOutputFileMR.class);
// FileInputFormat.addInputPath(job, new Path("hdfs://RS5-112:9000/cs/path1"));
// FileInputFormat.addInputPath(job, new Path("hdfs://RS5-112:9000/cs/path2"));
FileInputFormat.addInputPaths(job, "hdfs://RS5-112:9000/sina_yq/path1,hdfs://RS5-112:9000/cs/path2");
FileOutputFormat.setOutputPath(job, new Path("hdfs://RS5-112:9000/cs/path7"));
// MultipleOutputs.addNamedOutput(job, "a1", TextOutputFormat.class, Text.class, Text.class);
// MultipleOutputs.addNamedOutput(job, "a2", TextOutputFormat.class, Text.class, Text.class);
// MultipleOutputs.addNamedOutput(job, "a3", TextOutputFormat.class, Text.class, Text.class);
// MultipleOutputs.addNamedOutput(job, "a4", TextOutputFormat.class, Text.class, Text.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(MultiOutputFileMapper.class);
job.setReducerClass(MultiOutputFileReducer.class);
job.setNumReduceTasks(1);
System.exit(job.waitForCompletion(true)?0:1);
}
}