hadoop 2.6.0 LightWeightGSet源代码分析

LightWeightGSet的作用用一个数组来存储元素,并且用链表来解决冲突。不能rehash,所以内部数组永远不用改变大小。此类不支持空元素。此类也不是线程安全的。有两个类型参数,第一个用于查找元素,第二个类型参数必须是第一个类型参数的子类,并且必须实现LinkedElement接口。

/**
 * A low memory footprint {@link GSet} implementation,
 * which uses an array for storing the elements
 * and linked lists for collision resolution.
 *
 * No rehash will be performed.
 * Therefore, the internal array will never be resized.
 *
 * This class does not support null element.
 *
 * This class is not thread safe.
 *
 * @param <K> Key type for looking up the elements
 * @param <E> Element type, which must be
 *       (1) a subclass of K, and
 *       (2) implementing {@link LinkedElement} interface.
 */


里面各组件都非常好理解,唯一不好理解的是Iterator,

public class SetIterator implements Iterator<E> {
    /** The starting modification for fail-fast. */
    private int iterModification = modification;
    /** The current index of the entry array. */
    private int index = -1;
    private LinkedElement cur = null;//
    private LinkedElement next = nextNonemptyEntry();//next总是指向下一个元素,在初始化时就完成指下第一个元素。
                                    //在调用next()方法之后,next置为空,直到调用 ensureNext()方法。
 private boolean trackModification = true;

    /** Find the next nonempty entry starting at (index + 1). */
    private LinkedElement nextNonemptyEntry() {
      for(index++; index < entries.length && entries[index] == null; index++);
      return index < entries.length? entries[index]: null;
    }

    private void ensureNext() {
      if (trackModification && modification != iterModification) {
        throw new ConcurrentModificationException("modification=" + modification
            + " != iterModification = " + iterModification);
      }
      if (next != null) {
        return;
      }
      if (cur == null) {
        return;
      }
      next = cur.getNext();
      if (next == null) {
        next = nextNonemptyEntry();
      }
    }

    @Override
    public boolean hasNext() {
      ensureNext();
      return next != null;
    }

    @Override
    public E next() {
      ensureNext();
      if (next == null) {
        throw new IllegalStateException("There are no more elements");
      }
      cur = next;
      next = null;
      return convert(cur);
    }

    @SuppressWarnings("unchecked")
    @Override
    public void remove() {
      ensureNext();
      if (cur == null) {
        throw new IllegalStateException("There is no current element " +
            "to remove");
      }
      LightWeightGSet.this.remove((K)cur);
      iterModification++;
      cur = null;
    }

    public void setTrackModification(boolean trackModification) {
      this.trackModification = trackModification;
    }
  }</span>


computeCapacity()是一个工具方法,用于一定比例的内存的容器,可以存储多少对象。

参数,第一个是占最大内存的百分比,第二个是名称,没有什么用,只用作日志输出。



/**
   * Let t = percentage of max memory.
   * Let e = round(log_2 t).
   * Then, we choose capacity = 2^e/(size of reference),
   * unless it is outside the close interval [1, 2^30].
   */
  public static int computeCapacity(double percentage, String mapName) {
    return computeCapacity(Runtime.getRuntime().maxMemory(), percentage,
        mapName);
  }
  
  @VisibleForTesting
  static int computeCapacity(long maxMemory, double percentage,
      String mapName) {
    if (percentage > 100.0 || percentage < 0.0) {
      throw new HadoopIllegalArgumentException("Percentage " + percentage
          + " must be greater than or equal to 0 "
          + " and less than or equal to 100");
    }
    if (maxMemory < 0) {
      throw new HadoopIllegalArgumentException("Memory " + maxMemory
          + " must be greater than or equal to 0");
    }
    if (percentage == 0.0 || maxMemory == 0) {
      return 0;
    }
    //VM detection
    //See http://java.sun.com/docs/hotspot/HotSpotFAQ.html#64bit_detection
    final String vmBit = System.getProperty("sun.arch.data.model");

    //Percentage of max memory
    final double percentDivisor = 100.0/percentage;
    final double percentMemory = maxMemory/percentDivisor;
    
    //compute capacity
  /*
  详细描述如下:e1应该是以2为base的对数,如percentMemory为1024,结果为10,因为Math类不提供以2为base的对数,
所以采用了间接的方法,先求自然对数,再除以2的自然对数。例System.out.println(Math.log(1024)/Math.log(2));结果为10。
+0.5是为了四舍五入。
假设占用内存为1G,则e1为30.
e2的值,如果系统为32位,则减2,因为2,e2为28,c为2的28次方,为256M个对象,每个对象指针在32位系统中占4字节,总共1G.
如果系统为64位,e2为27,即对象个数为128M,每个对象指针为8字节,所以共占1G.
*/ 
 final int e1 = (int)(Math.log(percentMemory)/Math.log(2.0) + 0.5);
    final int e2 = e1 - ("32".equals(vmBit)? 2: 3);
    final int exponent = e2 < 0? 0: e2 > 30? 30: e2;
    final int c = 1 << exponent;

    LOG.info("Computing capacity for map " + mapName);
    LOG.info("VM type       = " + vmBit + "-bit");
    LOG.info(percentage + "% max memory "
        + StringUtils.TraditionalBinaryPrefix.long2String(maxMemory, "B", 1)
        + " = "
        + StringUtils.TraditionalBinaryPrefix.long2String((long) percentMemory,
            "B", 1));
    LOG.info("capacity      = 2^" + exponent + " = " + c + " entries");
    return c;
  }


你可能感兴趣的:(hadoop 2.6.0 LightWeightGSet源代码分析)