HashMap 源码分析

HashMap 认识

前者讲了ArrayList，LinkedList ，就前两者而言，反映的是两种思想：

ArrayList以数组形式实现，顺序插入、查找快，插入、删除较慢
LinkedList以链表形式实现，顺序插入、查找较慢，插入、删除方便

那么是否有一种数据结构能够结合上面两种的优点呢？有，答案就是HashMap。

HashMap是一种非常常见、方便和有用的集合，是一种键值对（K-V）形式的存储结构

关注点	结论
HashMap是否允许空	key 和 value 都允许为空
HashMap是否允许重复数据	key 不允许重复 vaue 运行重复
HashMap是否有序	无序的
HashMap是否线程安全	非线程安全

HashMap 构造器

    public HashMap(int initialCapacity, float loadFactor) {
        if (initialCapacity < 0)
            throw new IllegalArgumentException("Illegal initial capacity: " +
                                               initialCapacity);
        if (initialCapacity > MAXIMUM_CAPACITY)
            initialCapacity = MAXIMUM_CAPACITY;
        if (loadFactor <= 0 || Float.isNaN(loadFactor))
            throw new IllegalArgumentException("Illegal load factor: " +
                                               loadFactor);

        //初始化加载因子，默认的加载因子为 0.75
        this.loadFactor = loadFactor;

        //初始化容器的大小，默认的容器大小为 16
        threshold = initialCapacity;
        
        //子类会重写该方法，如：LinkedHashMap
        init();
    }

    public HashMap(int initialCapacity) {
        this(initialCapacity, DEFAULT_LOAD_FACTOR);
    }

    public HashMap() {
        this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR);
    }

    public HashMap(Map m) {
        this(Math.max((int) (m.size() / DEFAULT_LOAD_FACTOR) + 1,
                      DEFAULT_INITIAL_CAPACITY), DEFAULT_LOAD_FACTOR);
        inflateTable(threshold);

        putAllForCreate(m);
     }

HashMap 添加元素

首先看一下HashMap的一个存储单元Entry：

static class Entry implements Map.Entry {
        final K key;
        V value;
        Entry next;
        int hash;

        /**
         * Creates new entry.
         */
        Entry(int h, K k, V v, Entry n) {
            value = v;
            next = n;
            key = k;
            hash = h;
        }

从HashMap的Entry看得出，Entry组成的是一个单向链表，因为里面只有Entry的后继Entry，而没有Entry的前驱Entr

   public V put(K key, V value) {
        // 假如 table  是空的 
        if (table == EMPTY_TABLE) {
            
            // 初始化 table 以及 threshold 值
            inflateTable(threshold);
        }
        //假如key 为 null 的
        if (key == null)
            //将 key 为 null 的 Entry 放置 table 索引为 0  的位置
            return putForNullKey(value);

        //通过 key 计算 hash 值
        int hash = hash(key);

         // 通过 hans 值 与 table 的长度 计算出 实际索引值
        int i = indexFor(hash, table.length);

        for (Entry e = table[i]; e != null; e = e.next) {
            Object k;

           //如果该对应数据已存在，执行覆盖操作。用新value替换旧value，并返回旧value
            if (e.hash == hash && ((k = e.key) == key || key.equals(k))) {
                V oldValue = e.value;
                e.value = value;
                e.recordAccess(this);
                return oldValue;
            }
        }

        // //保证并发访问时，若HashMap内部结构发生变化，快速响应失败
        modCount++;
        
        //新增一个Entry
        addEntry(hash, key, value, i);
        return null;
    }

   private void inflateTable(int toSize) {
         //capacity一定是2的次幂
        int capacity = roundUpToPowerOf2(toSize); 

        //此处为threshold赋值，取capacity*loadFactor和MAXIMUM_CAPACITY+1的最小值，
        //capaticy一定不会超过MAXIMUM_CAPACITY，除非loadFactor大于1
        threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);
        // 初始化table
        table = new Entry[capacity];
        initHashSeedAsNeeded(capacity);
    }

inflateTable这个方法用于为主干数组table在内存中分配存储空间，通过roundUpToPowerOf2(toSize)可以确保capacity为大于或等于toSize的最接近toSize的二次幂，比如toSize=13,则capacity=16;to_size=16,capacity=16;to_size=17,capacity=32.

    private static int roundUpToPowerOf2(int number) {
        // assert number >= 0 : "number must be non-negative";
        int rounded = number >= MAXIMUM_CAPACITY
                ? MAXIMUM_CAPACITY
                : (rounded = Integer.highestOneBit(number)) != 0
                    ? (Integer.bitCount(number) > 1) ? rounded << 1 : rounded
                    : 1;

        return rounded;
    }

roundUpToPowerOf2中的这段处理使得数组长度一定为2的次幂，Integer.highestOneBit是用来获取最左边的bit（其他bit位为0）所代表的数值

void addEntry(int hash, K key, V value, int bucketIndex) {
        
       //当size超过临界阈值threshold，并且即将发生哈希冲突时进行扩容
        if ((size >= threshold) && (null != table[bucketIndex])) {
  
            //扩容之后数组的大小为原数组的两倍
            resize(2 * table.length); 
            hash = (null != key) ? hash(key) : 0;
            bucketIndex = indexFor(hash, table.length);
        }

        createEntry(hash, key, value, bucketIndex);
    }

 void resize(int newCapacity) {
        Entry[] oldTable = table;
        int oldCapacity = oldTable.length; //获取table 的长度
        if (oldCapacity == MAXIMUM_CAPACITY) {
            threshold = Integer.MAX_VALUE;
            return;
        }

        //重新创建一个容量为原数组的2 倍的 Entry 数组
        Entry[] newTable = new Entry[newCapacity];

        //将当前的Entry数组中的元素全部传输到新的 Entry 数组中去
        //需要重新计算每个Entry  的索引值
        transfer(newTable, initHashSeedAsNeeded(newCapacity));

        //将当前的 table 指向 新的 newTable
        table = newTable;

       //重新计算 threshold  的大小
        threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
    }

   void transfer(Entry[] newTable, boolean rehash) {
        int newCapacity = newTable.length;

　　     //for循环中的代码，逐个遍历链表，重新计算索引位置，将老数组数据复制到新数组
          //中去（数组不存储实际数据，所以仅仅是拷贝引用而已）
        for (Entry e : table) {
            while(null != e) {
                Entry next = e.next;
                if (rehash) {
                    e.hash = null == e.key ? 0 : hash(e.key);
                }
                int i = indexFor(e.hash, newCapacity);

　　　　　　　　　 //将当前entry的next链指向新的索引位置,newTable[i]有可能为空，有可能也是个
                  //entry链，如果是entry链，直接在链表头部插入。
                e.next = newTable[i];
                newTable[i] = e;
                e = next;
            }
        }
    }

 void createEntry(int hash, K key, V value, int bucketIndex) {
        Entry e = table[bucketIndex];
        
        //在以计算好的索引位置上 添加一个新增的 Entry
        table[bucketIndex] = new Entry<>(hash, key, value, e);
        size++;
    }

//根据 key 计算出 hash 值
final int hash(Object k) {
        int h = hashSeed;
        if (0 != h && k instanceof String) {
            return sun.misc.Hashing.stringHash32((String) k);
        }

        h ^= k.hashCode();

        h ^= (h >>> 20) ^ (h >>> 12);
        return h ^ (h >>> 7) ^ (h >>> 4);
    }

    /**
     * 返回数组下标
     */
    static int indexFor(int h, int length) {
        return h & (length-1);
    }

HashMap 删除数据


 public V remove(Object key) {
        Entry e = removeEntryForKey(key);
        return (e == null ? null : e.value);
    }

  final Entry removeEntryForKey(Object key) {
        if (size == 0) {
            return null;
        }
        //通过 key 计算 hash 值 
        int hash = (key == null) ? 0 : hash(key);
        //计算出 索引值
        int i = indexFor(hash, table.length);
        //获取索引 i 上的 Entry
        Entry prev = table[i];
        Entry e = prev;

        while (e != null) {
            Entry next = e.next;
            Object k;
            if (e.hash == hash &&
                ((k = e.key) == key || (key != null && key.equals(k)))) {
                modCount++;
                size--;
                if (prev == e)
                    table[i] = next;
                else
                    prev.next = next;
                e.recordRemoval(this);
                return e;
            }
            prev = e;
            e = next;
        }

        return e;
    }

HashMap的table为什么是transient的

transient Entry[] table;

看到table用了transient修饰，也就是说table里面的内容全都不会被序列化
因为HashMap是基于HashCode的，HashCode作为Object的方法，是native的
这意味着的是：HashCode和底层实现相关，不同的虚拟机可能有不同的HashCode算法。再进一步说得明白些就是，可能同一个Key在虚拟机A上的HashCode=1，在虚拟机B上的HashCode=2，在虚拟机C上的HashCode=3。

这就有问题了，Java自诞生以来，就以跨平台性作为最大卖点，好了，如果table不被transient修饰，在虚拟机A上可以用的程序到虚拟机B上可以用的程序就不能用了，失去了跨平台性，因为：

1、Key在虚拟机A上的HashCode=100，连在table[4]上

2、Key在虚拟机B上的HashCode=101，这样，就去table[5]上找Key，明显找不到

整个代码就出问题了。因此，为了避免这一点，Java采取了重写自己序列化table的方法，在writeObject选择将key和value追加到序列化的文件最后面

private void writeObject(java.io.ObjectOutputStream s)
        throws IOException
    {
        // Write out the threshold, loadfactor, and any hidden stuff
        s.defaultWriteObject();

        // Write out number of buckets
        if (table==EMPTY_TABLE) {
            s.writeInt(roundUpToPowerOf2(threshold));
        } else {
           s.writeInt(table.length);
        }

        // Write out size (number of Mappings)
        s.writeInt(size);

        // Write out keys and values (alternating)
        if (size > 0) {
            for(Map.Entry e : entrySet0()) {
                s.writeObject(e.getKey());
                s.writeObject(e.getValue());
            }
        }
    }

private void readObject(java.io.ObjectInputStream s)
         throws IOException, ClassNotFoundException
    {
        // Read in the threshold (ignored), loadfactor, and any hidden stuff
        s.defaultReadObject();
        if (loadFactor <= 0 || Float.isNaN(loadFactor)) {
            throw new InvalidObjectException("Illegal load factor: " +
                                               loadFactor);
        }

        // set other fields that need values
        table = (Entry[]) EMPTY_TABLE;

        // Read in number of buckets
        s.readInt(); // ignored.

        // Read number of mappings
        int mappings = s.readInt();
        if (mappings < 0)
            throw new InvalidObjectException("Illegal mappings count: " +
                                               mappings);

        // capacity chosen by number of mappings and desired load (if >= 0.25)
        int capacity = (int) Math.min(
                    mappings * Math.min(1 / loadFactor, 4.0f),
                    // we have limits...
                    HashMap.MAXIMUM_CAPACITY);

        // allocate the bucket array;
        if (mappings > 0) {
            inflateTable(capacity);
        } else {
            threshold = capacity;
        }

        init();  // Give subclass a chance to do its thing.

        // Read the keys and values, and put the mappings in the HashMap
        for (int i = 0; i < mappings; i++) {
            K key = (K) s.readObject();
            V value = (V) s.readObject();
            putForCreate(key, value);
        }
    }