要点:
类型比较在hadoop的mapreduce中非常重要,主要用来比较keys;
hadoop中的RawComparator<T>接口继承自java的comparator, 主要用来比较序列化的objects;
举例来说 比较object: compare(new IntWritable(21), new IntWritable(998)); 比较serialized representations: compare(serialize(new IntWritable(21)), serialize(new IntWritable(998))),
提示:继承关系
1.org.apache.hadoop.io
Interface RawComparator<T>
//description
public interface RawComparator<T>
extends Comparator<T>
//method
int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2)
2.org.apache.hadoop.io
Interface WritableComparable<T>
//description
public interface WritableComparable<T>
extends Writable, Comparable<T>
//method
Methods inherited from interface org.apache.hadoop.io.Writable
readFields, write
3.java.lang.Object
|__ org.apache.hadoop.io.WritableComparator
//description
public class WritableComparator
extends Object
implements RawComparator
//methods
int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2)
int compare(Object a, Object b)
int compare(WritableComparable a, WritableComparable b)
static int compareBytes(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2)
4.java.util
Interface Comparator<T>
//description
public interface Comparator<T>
//methods
int compare(T o1, T o2)
boolean equals(Object obj)
代码:
1 import java.lang.Byte; 2 import java.io.DataOutputStream; 3 import java.io.ByteArrayOutputStream; 4 5 import org.apache.hadoop.io.IntWritable; 6 import org.apache.hadoop.io.WritableComparator; 7 import org.apache.hadoop.io.RawComparator; 8 9 public class MyIntWritableComparactor { 10 11 public static byte[] serialize(IntWritable writable) throws Exception { 12 ByteArrayOutputStream out = new ByteArrayOutputStream(); 13 DataOutputStream dataOut = new DataOutputStream(out); 14 writable.write(dataOut); 15 dataOut.close(); 16 return out.toByteArray(); 17 } 18 19 @SuppressWarnings("unchecked") 20 public static void main(String[] args) throws Exception { 21 RawComparator<IntWritable> comparator = WritableComparator.get(IntWritable.class); 22 IntWritable w1 = new IntWritable(13); 23 IntWritable w2 = new IntWritable(12); 24 System.out.println("w1: " + w1 + " w2: " + w2); 25 System.out.println("w1 compare w2 : " + comparator.compare(w1,w2)); 26 27 byte[] b1 = serialize(w1); 28 byte[] b2 = serialize(w2); 29 System.out.println("b1.length: " + b1.length); 30 System.out.println("b2.length: " + b2.length); 31 System.out.println("b1.length compare b2.length: " + comparator.compare(b1, 0, b1.length, b2, 0, b2.length)); 32 33 } 34 }
编译,运行:
//注意我用的是hadoop2.2 $ source $YARN_HOME/libexec/hadoop-config.sh $ mkdir myclass $ javac -d myclass MyIntWritableCompare.java $ jar -cvf mycompare.jar -C myclass ./ $ export HADOOP_CLASSPATH=$CLASSPATH:mycompare.jar $ yarn MyIntWritableCompare
输出:
$ yarn jar text.jar Text w1: 13 w2: 12 w1 compare w2 : 1 b1.length: 4 b2.length: 4 b1.length compare b2.length: 1