1 Configuration conf = getConf(); 2 JobConf job = new JobConf(conf); 3 4 job.setJobName("ChainJob"); 5 job.setInputFormat(TextInputFormat.class); 6 job.setOutputFormat(TextOutputFormat.class); 7 8 FileInputFormat.setInputPaths(job, in); 9 FileOutputFormat.setOutputPath(job, out); 10 11 12 JobConf map1Conf = new JobConf(false); 13 ChainMapper.addMapper(job, 14 Map1.class, 15 LongWritable.class, 16 Text.class, 17 Text.class, 18 Text.class, 19 true, 20 map1Conf); 21 22 JobConf map2Conf = new JobConf(false); 23 ChainMapper.addMapper(job, 24 Map2.class, 25 Text.class, 26 Text.class, 27 LongWritable.class, 28 Text.class, 29 true, 30 map2Conf); 31 32 JobConf reduceConf = new JobConf(false); 33 ChainReducer.setReducer(job, 34 Reduce.class, 35 LongWritable.class, 36 Text.class, 37 Text.class, 38 Text.class, 39 true, 40 reduceConf); 41 42 JobConf map3Conf = new JobConf(false); 43 ChainReducer.addMapper(job, 44 Map3.class, 45 Text.class, 46 Text.class, 47 LongWritable.class, 48 Text.class, 49 true, 50 map3Conf); 51 52 JobConf map4Conf = new JobConf(false); 53 ChainReducer.addMapper(job, 54 Map4.class, 55 LongWritable.class, 56 Text.class, 57 LongWritable.class, 58 Text.class, 59 true, 60 map4Conf); 61 62 JobClient.runJob(job);
1 import java.io.DataInput; 2 import java.io.DataOutput; 3 import java.io.IOException; 4 import java.util.Iterator; 5 6 import org.apache.hadoop.conf.Configuration; 7 import org.apache.hadoop.conf.Configured; 8 import org.apache.hadoop.fs.Path; 9 import org.apache.hadoop.io.Text; 10 import org.apache.hadoop.io.Writable; 11 import org.apache.hadoop.mapred.FileInputFormat; 12 import org.apache.hadoop.mapred.FileOutputFormat; 13 import org.apache.hadoop.mapred.JobClient; 14 import org.apache.hadoop.mapred.JobConf; 15 import org.apache.hadoop.mapred.KeyValueTextInputFormat; 16 import org.apache.hadoop.mapred.MapReduceBase; 17 import org.apache.hadoop.mapred.Mapper; 18 import org.apache.hadoop.mapred.OutputCollector; 19 import org.apache.hadoop.mapred.Reducer; 20 import org.apache.hadoop.mapred.Reporter; 21 import org.apache.hadoop.mapred.TextInputFormat; 22 import org.apache.hadoop.mapred.TextOutputFormat; 23 import org.apache.hadoop.util.Tool; 24 import org.apache.hadoop.util.ToolRunner; 25 26 import org.apache.hadoop.contrib.utils.join.DataJoinMapperBase; 27 import org.apache.hadoop.contrib.utils.join.DataJoinReducerBase; 28 import org.apache.hadoop.contrib.utils.join.TaggedMapOutput; 29 30 public class DataJoin extends Configured implements Tool { 31 32 public static class MapClass extends DataJoinMapperBase { 33 34 protected Text generateInputTag(String inputFile) { 35 String datasource = inputFile.split("-")[0]; 36 return new Text(datasource); 37 } 38 39 protected Text generateGroupKey(TaggedMapOutput aRecord) { 40 String line = ((Text) aRecord.getData()).toString(); 41 String[] tokens = line.split(","); 42 String groupKey = tokens[0]; 43 return new Text(groupKey); 44 } 45 46 protected TaggedMapOutput generateTaggedMapOutput(Object value) { 47 TaggedWritable retv = new TaggedWritable((Text) value); 48 retv.setTag(this.inputTag); 49 return retv; 50 } 51 } 52 53 public static class Reduce extends DataJoinReducerBase { 54 55 protected TaggedMapOutput combine(Object[] tags, Object[] values) { 56 if (tags.length < 2) return null; 57 String joinedStr = ""; 58 for (int i=0; i<values.length; i++) { 59 if (i > 0) joinedStr += ","; 60 TaggedWritable tw = (TaggedWritable) values[i]; 61 String line = ((Text) tw.getData()).toString(); 62 String[] tokens = line.split(",", 2); 63 joinedStr += tokens[1]; 64 } 65 TaggedWritable retv = new TaggedWritable(new Text(joinedStr)); 66 retv.setTag((Text) tags[0]); 67 return retv; 68 } 69 } 70 71 public static class TaggedWritable extends TaggedMapOutput { 72 73 private Writable data; 74 75 public TaggedWritable(Writable data) { 76 this.tag = new Text(""); 77 this.data = data; 78 } 79 80 public Writable getData() { 81 return data; 82 } 83 84 public void write(DataOutput out) throws IOException { 85 this.tag.write(out); 86 this.data.write(out); 87 } 88 89 public void readFields(DataInput in) throws IOException { 90 this.tag.readFields(in); 91 this.data.readFields(in); 92 } 93 } 94 95 public int run(String[] args) throws Exception { 96 Configuration conf = getConf(); 97 98 JobConf job = new JobConf(conf, DataJoin.class); 99 100 Path in = new Path(args[0]); 101 Path out = new Path(args[1]); 102 FileInputFormat.setInputPaths(job, in); 103 FileOutputFormat.setOutputPath(job, out); 104 105 job.setJobName("DataJoin"); 106 job.setMapperClass(MapClass.class); 107 job.setReducerClass(Reduce.class); 108 109 job.setInputFormat(TextInputFormat.class); 110 job.setOutputFormat(TextOutputFormat.class); 111 job.setOutputKeyClass(Text.class); 112 job.setOutputValueClass(TaggedWritable.class); 113 job.set("mapred.textoutputformat.separator", ","); 114 115 JobClient.runJob(job); 116 return 0; 117 } 118 119 public static void main(String[] args) throws Exception { 120 int res = ToolRunner.run(new Configuration(), 121 new DataJoin(), 122 args); 123 124 System.exit(res); 125 } 126 }