上一篇我们分析了Map接口以及AbstractMap提供的Map接口的默认实现,那么现在是时候揭开HashMap的神秘面纱了。HashMap只是Map接口的一种实现,Map接口比较知名的实现类还有TreeMap和LinkedHashMap,另外两个实现类我们后面在分析,现在主要看HashMap的底层源码实现。HashMap的主要特点就是它查询非常的高效,可以在O(1)时间内实现元素的查找、删除和插入操作,缺点就是无序,已经占用空间大。如果想要维持内部元素的顺序使用HashMap肯定是不合适的。
那么HashMap到底是怎么实现的呢?它底层数据又是如何组织的呢?这里先给出结论:其实HashMap底层的数据组织方式就是一个可变长数组+单向链表(这里是居于JDk1.7的版本,高版本有更高效的实现,有机会我们后面再分析其他版本的实现)。
好了现在开始阅读源码(先看主要的成员变量和构造函数):
public class HashMap
extends AbstractMap
implements Map, Cloneable, Serializable
{
// 默认初始化大小 16(2的幂次方) 后面可以看到HashMap内数组的大小一直是2的幂次方,为了效率
static final int DEFAULT_INITIAL_CAPACITY = 16;
// hash表的最大容量
static final int MAXIMUM_CAPACITY = 1 << 30;
// 默认的负载因子 等于hash表中实际储存的元素数量大小与hash表长度的比值:size / table.lebgth
static final float DEFAULT_LOAD_FACTOR = 0.75f;
// Entry数组,Entry是Map接口内部定义的一个接口,表示一个键值对。这个也被称为hash表
transient Entry[] table;
// 集合内元素的数量
transient int size;
// 阈值,当集合内的元素数量(size)大于阈值时,进行rehash(相当于ArrayList中扩容)。该值等于 table.length * loadFactor
int threshold;
// 负载因子
final float loadFactor;
// 修改次数
transient int modCount;
public HashMap(int initialCapacity, float loadFactor) {
if (initialCapacity < 0)
throw new IllegalArgumentException("Illegal initial capacity: " +
initialCapacity);
if (initialCapacity > MAXIMUM_CAPACITY)
initialCapacity = MAXIMUM_CAPACITY;
if (loadFactor <= 0 || Float.isNaN(loadFactor))
throw new IllegalArgumentException("Illegal load factor: " +
loadFactor);
// 找到比initialCapacity大的2次幂,所以即使指定初始化大小,底层分别的实际大小可能不一样
int capacity = 1;
while (capacity < initialCapacity)
capacity <<= 1;
this.loadFactor = loadFactor;
threshold = (int)Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);
table = new Entry[capacity];
init(); // 初始化函数,在HashMap里面为空,子类如果有初始化的资源可以重写这个函数
}
void init() {
}
public HashMap(int initialCapacity) {
this(initialCapacity, DEFAULT_LOAD_FACTOR);
}
public HashMap() {
this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR);
}
public HashMap(Map extends K, ? extends V> m) {
this(Math.max((int) (m.size() / DEFAULT_LOAD_FACTOR) + 1,
DEFAULT_INITIAL_CAPACITY), DEFAULT_LOAD_FACTOR);
putAllForCreate(m);
}
// 循环调用putForCreate添加m中的所有元素
private void putAllForCreate(Map extends K, ? extends V> m) {
for (Map.Entry extends K, ? extends V> e : m.entrySet())
putForCreate(e.getKey(), e.getValue());
}
private void putForCreate(K key, V value) {
int hash = null == key ? 0 : hash(key); // hash(key) 计算hash值
int i = indexFor(hash, table.length); // 找到应该储存在数组中的下标 ,取模操作(由于table.length是2的幂次方,可以使用位运算来代替)
// 判断key是否已经在hash表中了,如果是,替换相对于的值
for (Entry e = table[i]; e != null; e = e.next) {
Object k;
if (e.hash == hash && // 先比较hash值,整数的比较比较快。如果两个对象的hash值不一样,那么他们肯定不相等。反之,不成立。
((k = e.key) == key || (key != null && key.equals(k)))) {
e.value = value;
return;
}
}
createEntry(hash, key, value, i);
}
// 使用头插法,将新节点插入到头部,后面我们会看到里层的进一步源码
void createEntry(int hash, K key, V value, int bucketIndex) {
Entry e = table[bucketIndex];
table[bucketIndex] = new Entry<>(hash, key, value, e);
size++;
}
}
来看结点Entry的实现:
static class Entry implements Map.Entry {
final K key; // 使用final修饰,key不可变了,是一个常量
V value;
Entry next; // 单链表实现
int hash; // 保存hash值是为了比较时更加快速,整数的比较更快
// 构造函数
Entry(int h, K k, V v, Entry n) {
value = v;
next = n;
key = k;
hash = h;
}
public final K getKey() {
return key;
}
public final V getValue() {
return value;
}
public final V setValue(V newValue) {
V oldValue = value;
value = newValue;
return oldValue;
}
public final boolean equals(Object o) {
if (!(o instanceof Map.Entry))
return false;
Map.Entry e = (Map.Entry)o;
Object k1 = getKey();
Object k2 = e.getKey();
if (k1 == k2 || (k1 != null && k1.equals(k2))) {
Object v1 = getValue();
Object v2 = e.getValue();
if (v1 == v2 || (v1 != null && v1.equals(v2)))
return true;
}
return false;
}
// 一个Entry的hash值等于key的hash与value的hash的异或
public final int hashCode() {
return (key==null ? 0 : key.hashCode()) ^
(value==null ? 0 : value.hashCode());
}
public final String toString() {
return getKey() + "=" + getValue();
}
// 空函数,应该是为了以后扩展吧
void recordAccess(HashMap m) {
}
// 空函数,应该是为了以后扩展吧
void recordRemoval(HashMap m) {
}
}
Entry是Map接口中定义的内部接口,封装了一组键值对。在HashMap中通过一个静态内部类实现了这个的接口。从上面Entry的实现方式可以看出,Entry实际上就是一个单向链表(有一个指向后继结点的指针next)。现在我们应该就能理解了开头说的那句:HashMap底层的数据组织方式就是一个可变长数组(table数组,类型为Entry
接着看一些简单的public函数:
// hash函数的实现
final int hash(Object k) {
int h = k.hashCode();
// This function ensures that hashCodes that differ only by
// constant multiples at each bit position have a bounded
// number of collisions (approximately 8 at default load factor).
// 使hash值分布的更均匀,减少hash冲突
h ^= (h >>> 20) ^ (h >>> 12);
return h ^ (h >>> 7) ^ (h >>> 4);
}
// 根据hash值和数组长度确定hash表索引值,看过ArrayDeque源码分析的同学肯定不陌生了
static int indexFor(int h, int length) {
return h & (length-1);
}
public int size() {
return size;
}
public boolean isEmpty() {
return size == 0;
}
// 根据key查找对应的value
public V get(Object key) {
if (key == null) // 可以看到HashMap允许key为null
return getForNullKey();
Entry entry = getEntry(key);
return null == entry ? null : entry.getValue();
}
// 当key为null时对应的hash值为0。这句话反过来说是不对的,hash值为0,对应的key可不一定为0哦。
private V getForNullKey() {
for (Entry e = table[0]; e != null; e = e.next) {
if (e.key == null)
return e.value;
}
return null;
}
public boolean containsKey(Object key) {
return getEntry(key) != null;
}
// 根据key查找对应的Entry
final Entry getEntry(Object key) {
int hash = (key == null) ? 0 : hash(key);
for (Entry e = table[indexFor(hash, table.length)];
e != null;
e = e.next) {
Object k;
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
return e;
}
return null;
}
// 根据key删除元素,如果hash表中不包括对应的key返回null
public V remove(Object key) {
Entry e = removeEntryForKey(key);
return (e == null ? null : e.value);
}
// 根据key删除entry 在HashMap中,key唯一
final Entry removeEntryForKey(Object key) {
int hash = (key == null) ? 0 : hash(key);
int i = indexFor(hash, table.length);
Entry prev = table[i];
Entry e = prev;
while (e != null) {
Entry next = e.next;
Object k;
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k)))) {
modCount++;
size--;
if (prev == e) // 说明删除的位头结点,之间将table[i]指向next,链表的删除知识
table[i] = next;
else
prev.next = next; // 删除的不是头结点,使待删除的前驱结点指向其后继结点
e.recordRemoval(this);
return e;
}
prev = e;
e = next;
}
return e;
}
// 清空集合
public void clear() {
modCount++;
Entry[] tab = table;
for (int i = 0; i < tab.length; i++)
tab[i] = null;
size = 0;
}
// 是否包含某个值,遍历整个链表,O(n)
public boolean containsValue(Object value) {
if (value == null)
return containsNullValue();
Entry[] tab = table;
for (int i = 0; i < tab.length ; i++)
for (Entry e = tab[i] ; e != null ; e = e.next)
if (value.equals(e.value))
return true;
return false;
}
private boolean containsNullValue() {
Entry[] tab = table;
for (int i = 0; i < tab.length ; i++)
for (Entry e = tab[i] ; e != null ; e = e.next)
if (e.value == null)
return true;
return false;
}
接下来我们来看向HashMap中添加元素时发生了那些步骤:
// 向hash表中添加一个元素,如果key已经存在集合中了,更新value值,返回原来的value,如果不存在,返回null
public V put(K key, V value) {
if (key == null)
return putForNullKey(value); // 可以向hash中添加null
int hash = hash(key); // 计算hash值
int i = indexFor(hash, table.length); // 计算储存索引
for (Entry e = table[i]; e != null; e = e.next) {
Object k;
if (e.hash == hash && ((k = e.key) == key || key.equals(k))) { // 判断是否已经存在相同的key了,如果是替换相应的value
V oldValue = e.value;
e.value = value;
e.recordAccess(this);
return oldValue;
}
}
modCount++;
addEntry(hash, key, value, i); // key不存在原来的集合里,将其添加进来
return null;
}
private V putForNullKey(V value) {
for (Entry e = table[0]; e != null; e = e.next) { // null 的hash值为0,所以是table[0]
if (e.key == null) {
V oldValue = e.value;
e.value = value;
e.recordAccess(this);
return oldValue;
}
}
modCount++;
addEntry(0, null, value, 0);
return null;
}
void addEntry(int hash, K key, V value, int bucketIndex) {
if ((size >= threshold) && (null != table[bucketIndex])) { // 但元素个数大于阈值时扩容
resize(2 * table.length); // 扩容 2 倍
hash = (null != key) ? hash(key) : 0;
bucketIndex = indexFor(hash, table.length);
}
createEntry(hash, key, value, bucketIndex);
}
void createEntry(int hash, K key, V value, int bucketIndex) {
Entry e = table[bucketIndex];
table[bucketIndex] = new Entry<>(hash, key, value, e); // 将新的entry插入到头部
size++;
}
void resize(int newCapacity) {
Entry[] oldTable = table;
int oldCapacity = oldTable.length;
if (oldCapacity == MAXIMUM_CAPACITY) {
threshold = Integer.MAX_VALUE;
return;
}
Entry[] newTable = new Entry[newCapacity];
boolean oldAltHashing = useAltHashing;
useAltHashing |= sun.misc.VM.isBooted() &&
(newCapacity >= Holder.ALTERNATIVE_HASHING_THRESHOLD);
boolean rehash = oldAltHashing ^ useAltHashing;
transfer(newTable, rehash);
table = newTable;
threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
}
// rehash 一般为false
void transfer(Entry[] newTable, boolean rehash) {
int newCapacity = newTable.length;
for (Entry e : table) {
while(null != e) {
Entry next = e.next;
if (rehash) {
e.hash = null == e.key ? 0 : hash(e.key);
}
int i = indexFor(e.hash, newCapacity);
e.next = newTable[i];
newTable[i] = e;
e = next;
}
}
}
// 将集合m中的所有元素都增加进来
public void putAll(Map extends K, ? extends V> m) {
int numKeysToBeAdded = m.size();
if (numKeysToBeAdded == 0)
return;
/*
* Expand the map if the map if the number of mappings to be added
* is greater than or equal to threshold. This is conservative; the
* obvious condition is (m.size() + size) >= threshold, but this
* condition could result in a map with twice the appropriate capacity,
* if the keys to be added overlap with the keys already in this map.
* By using the conservative calculation, we subject ourself
* to at most one extra resize.
*/
if (numKeysToBeAdded > threshold) {
int targetCapacity = (int)(numKeysToBeAdded / loadFactor + 1);
if (targetCapacity > MAXIMUM_CAPACITY)
targetCapacity = MAXIMUM_CAPACITY;
int newCapacity = table.length;
while (newCapacity < targetCapacity)
newCapacity <<= 1;
if (newCapacity > table.length)
resize(newCapacity);
}
for (Map.Entry extends K, ? extends V> e : m.entrySet())
put(e.getKey(), e.getValue());
}
以上,就是保存键值对的主要代码,简单总结一下,基本步骤为:
计算键的哈希值
根据哈希值得到保存位置(取模)
插到对应位置的链表头部或更新已有值
根据需要扩展table大小
以上描述可能比较抽象,我们通过一个例子,用图示的方式,再来看下,代码是:
Map countMap = new HashMap<>();
countMap.put("hello", 1);
countMap.put("world", 3);
countMap.put("position", 4);
在通过new HashMap()创建一个对象后,内存中的图示结构大概是:
接下来执行
countMap.put("hello", 1);
"hello"的hash值为96207088,模16的结果为0,所以插入table[0]指向的链表头部,内存结构会变为:
"world"的hash值为111207038,模16结果为15,所以保存完"world"后,内存结构会变为:
"position"的hash值为771782464,模16结果也为0,table[0]已经有节点了,新节点会插到链表头部,内存结构会变为:
最后一点内容就是关于keySet,EntrySet,values的内容了,看源码(可能有点繁琐,但是不难):
private abstract class HashIterator implements Iterator {
Entry next; // next entry to return
int expectedModCount; // For fast-fail
int index; // current slot
Entry current; // current entry
HashIterator() {
expectedModCount = modCount;
if (size > 0) { // 提前到第一个非nullEntry处
Entry[] t = table;
while (index < t.length && (next = t[index++]) == null)
;
}
}
public final boolean hasNext() {
return next != null;
}
final Entry nextEntry() {
if (modCount != expectedModCount) // 迭代期间不允许更改集合
throw new ConcurrentModificationException();
Entry e = next;
if (e == null)
throw new NoSuchElementException();
if ((next = e.next) == null) {
Entry[] t = table;
while (index < t.length && (next = t[index++]) == null)
;
}
current = e;
return e;
}
// 可以通过迭代器提供的函数还删除最近返回的元素
public void remove() {
if (current == null)
throw new IllegalStateException();
if (modCount != expectedModCount)
throw new ConcurrentModificationException();
Object k = current.key;
current = null;
HashMap.this.removeEntryForKey(k);
expectedModCount = modCount;
}
}
private final class ValueIterator extends HashIterator {
public V next() {
return nextEntry().value;
}
}
private final class KeyIterator extends HashIterator {
public K next() {
return nextEntry().getKey();
}
}
private final class EntryIterator extends HashIterator> {
public Map.Entry next() {
return nextEntry();
}
}
Iterator newKeyIterator() {
return new KeyIterator();
}
Iterator newValueIterator() {
return new ValueIterator();
}
Iterator> newEntryIterator() {
return new EntryIterator();
}
public Set keySet() {
Set ks = keySet;
return (ks != null ? ks : (keySet = new KeySet()));
}
// keySet返回的是视图
private final class KeySet extends AbstractSet {
public Iterator iterator() {
return newKeyIterator();
}
public int size() {
return size;
}
public boolean contains(Object o) {
return containsKey(o);
}
public boolean remove(Object o) {
return HashMap.this.removeEntryForKey(o) != null;
}
public void clear() {
HashMap.this.clear();
}
}
public Collection values() {
Collection vs = values;
return (vs != null ? vs : (values = new Values()));
}
// values 返回的是视图
private final class Values extends AbstractCollection {
public Iterator iterator() {
return newValueIterator();
}
public int size() {
return size;
}
public boolean contains(Object o) {
return containsValue(o);
}
public void clear() {
HashMap.this.clear();
}
}
private transient Set> entrySet = null;
public Set> entrySet() {
return entrySet0();
}
private Set> entrySet0() {
Set> es = entrySet;
return es != null ? es : (entrySet = new EntrySet());
}
// entry 返回的是视图
private final class EntrySet extends AbstractSet> {
public Iterator> iterator() {
return newEntryIterator();
}
public boolean contains(Object o) {
if (!(o instanceof Map.Entry))
return false;
Map.Entry e = (Map.Entry) o;
Entry candidate = getEntry(e.getKey());
return candidate != null && candidate.equals(e);
}
public boolean remove(Object o) {
return removeMapping(o) != null;
}
public int size() {
return size;
}
public void clear() {
HashMap.this.clear();
}
}
好了HashMap的源码就分析完毕了,其实根本就不难,大家不要害怕阅读源码。
参考:Java编程的逻辑