1-1、Partitioner 简介
public abstract class Partitioner<KEY, VALUE> {
/**
* Get the partition number for a given key (hence record) given the total
* number of partitions i.e. number of reduce-tasks for the job.
*
* <p>Typically a hash function on a all or a subset of the key.</p>
*
* @param key the key to be partioned.
* @param value the entry value.
* @param numPartitions the total number of partitions.
* @return the partition number for the <code>key</code>.
*/
public abstract int getPartition(KEY key, VALUE value, int numPartitions);
}
|
/** Partition keys by their {@link Object#hashCode()}. */
@InterfaceAudience.Public
@InterfaceStability.Stable
public class HashPartitioner<K, V> extends Partitioner<K, V> {
/** Use {@link Object#hashCode()} to partition. */
public int getPartition(K key, V value,
int numReduceTasks) {
return (key.hashCode() & Integer.MAX_VALUE) % numReduceTasks;
}
}
|
public class BinaryPartitioner<V> extends Partitioner<BinaryComparable, V>
implements Configurable {
public static final String LEFT_OFFSET_PROPERTY_NAME =
"mapreduce.partition.binarypartitioner.left.offset";
public static final String RIGHT_OFFSET_PROPERTY_NAME =
"mapreduce.partition.binarypartitioner.right.offset";
public static void setOffsets(Configuration conf, int left, int right) {
conf.setInt(LEFT_OFFSET_PROPERTY_NAME, left);
conf.setInt(RIGHT_OFFSET_PROPERTY_NAME, right);
}
public static void setLeftOffset(Configuration conf, int offset) {
conf.setInt(LEFT_OFFSET_PROPERTY_NAME, offset);
}
public static void setRightOffset(Configuration conf, int offset) {
conf.setInt(RIGHT_OFFSET_PROPERTY_NAME, offset);
}
@Override
public int getPartition(BinaryComparable key, V value, int numPartitions) {
int length = key.getLength();
int leftIndex = (leftOffset + length) % length;
int rightIndex = (rightOffset + length) % length;
int hash = WritableComparator.hashBytes(key.getBytes(),
leftIndex, rightIndex - leftIndex + 1);
return (hash & Integer.MAX_VALUE) % numPartitions;
}
}
|
public class KeyFieldBasedPartitioner<K2, V2> extends Partitioner<K2, V2>
implements Configurable {
private static final Log LOG = LogFactory.getLog(
KeyFieldBasedPartitioner.class.getName());
public static String PARTITIONER_OPTIONS =
"mapreduce.partition.keypartitioner.options";
private int numOfPartitionFields;
public int getPartition(K2 key, V2 value, int numReduceTasks) {
byte[] keyBytes;
List <KeyDescription> allKeySpecs = keyFieldHelper.keySpecs();
if (allKeySpecs.size() == 0) {
return getPartition(key.toString().hashCode(), numReduceTasks);
}
try {
keyBytes = key.toString().getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("The current system does not " +
"support UTF-8 encoding!", e);
}
// return 0 if the key is empty
if (keyBytes.length == 0) {
return 0;
}
int []lengthIndicesFirst = keyFieldHelper.getWordLengths(keyBytes, 0,
keyBytes.length);
int currentHash = 0;
for (KeyDescription keySpec : allKeySpecs) {
int startChar = keyFieldHelper.getStartOffset(keyBytes, 0,
keyBytes.length, lengthIndicesFirst, keySpec);
// no key found! continue
if (startChar < 0) {
continue;
}
int endChar = keyFieldHelper.getEndOffset(keyBytes, 0, keyBytes.length,
lengthIndicesFirst, keySpec);
currentHash = hashCode(keyBytes, startChar, endChar,
currentHash);
}
return getPartition(currentHash, numReduceTasks);
}
protected int hashCode(byte[] b, int start, int end, int currentHash) {
for (int i = start; i <= end; i++) {
currentHash = 31*currentHash + b[i];
}
return currentHash;
}
protected int getPartition(int hash, int numReduceTasks) {
return (hash & Integer.MAX_VALUE) % numReduceTasks;
}
}
|