DataJoin 实例

1、《hadoop 实战》书上的例子并不能运行成功。以下我的代码,可以运行成功:

 

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.contrib.utils.join.DataJoinMapperBase;
import org.apache.hadoop.contrib.utils.join.DataJoinReducerBase;
import org.apache.hadoop.contrib.utils.join.TaggedMapOutput;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class DataJoin extends Configured implements Tool{

	public static class DataJoinMapper extends DataJoinMapperBase {

		@Override
		protected Text generateGroupKey(TaggedMapOutput aRecord) {
			return new Text(aRecord.getData().toString().split(",")[0]);
		}

		@Override
		protected Text generateInputTag(String inputFiles) {
			return new Text(inputFiles);  
		}

		@Override
		protected TaggedMapOutput generateTaggedMapOutput(Object value) {
			TaggedMapOutput ret = new TaggedWritable((Text)value);  
			ret.setTag(this.inputTag);
			return ret;
		}
	}
	
	public static class TaggedWritable extends TaggedMapOutput {

		private Writable data;
		
		public TaggedWritable() {
			this.tag = new Text("");
			this.data = new Text("");
		}
		
		public TaggedWritable(Writable data) {
			this.tag = new Text("");
			this.data = data;
		}
		
		@Override
		public void write(DataOutput data) throws IOException {
			this.tag.write(data);
			this.data.write(data);
		}

		@Override
		public void readFields(DataInput in) throws IOException {
			this.data.readFields(in);
			this.tag.readFields(in);
		}

		@Override
		public Writable getData() {
			return data;
		}
		
	}
	
	public static class DataJoinReducer extends DataJoinReducerBase {

		@Override
		protected TaggedMapOutput combine(Object[] tags, Object[] values) {
			if (tags.length < 2) {
				return null;
			}
			StringBuilder joinedStr = new StringBuilder();
			for (int i = 0; i < values.length; i++) {
				if (i > 0) {
					joinedStr.append(",");
				}
				TaggedWritable tw = (TaggedWritable)values[i];
				String line = ((Text)tw.getData()).toString();
				String[] tokens = line.split(",",2);
				joinedStr.append(tokens[1]);
			}
			TaggedWritable ret = new TaggedWritable(new Text(joinedStr.toString()));
			ret.setTag((Text)tags[0]);
			return ret;
		}
		
	}
	
	@Override
	public int run(String[] args) throws Exception {
		JobConf job = new JobConf(getConf());
		job.setJarByClass(getClass());
		job.setJobName("datajoin");
		Path in1 = new Path("/join/customers/");
		Path in2 = new Path("/join/orders/");
		FileInputFormat.addInputPath(job, in1);
		FileInputFormat.addInputPath(job, in2);
		Path out = new Path("/join/output/");
		FileOutputFormat.setOutputPath(job, out);
		
		job.setMapperClass(DataJoinMapper.class);
		job.setReducerClass(DataJoinReducer.class);
		
		job.setInputFormat(TextInputFormat.class);
		job.setOutputFormat(TextOutputFormat.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(TaggedWritable.class);
		
		job.set("mapred.textoutputformat.separator",",");
		
		JobClient.runJob(job);
		return 0;
	}

	public static void main(String[] args) throws Exception {
		int res = ToolRunner.run(new DataJoin(),args);
		System.exit(res);
	}
}

 可以参考:

  http://www.cnblogs.com/aprilrain/archive/2013/01/28/2880460.html

  http://blog.csdn.net/jokes000/article/details/7080551

你可能感兴趣的:(JOIN)