本章, 我们来看下java.util.HashMap
.
从图中可以看出, 其继承关系为:Collection <-- Map <-- AbstractMap <-- HashMap
.
private static final long serialVersionUID = 362498820763181265L;
static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16
static final int MAXIMUM_CAPACITY = 1 << 30;
// 扩充因子
static final float DEFAULT_LOAD_FACTOR = 0.75f;
// 单槽内多于这个数字阈值转换为树
static final int TREEIFY_THRESHOLD = 8;
// 单槽内小于这个数字阈值转换为链表
static final int UNTREEIFY_THRESHOLD = 6;
// 总体转变为树的CAPACITY 阈值
static final int MIN_TREEIFY_CAPACITY = 64;
transient Node[] table;
transient Set> entrySet;
// 整个HashMap长度
transient int size;
// 修改次数
transient int modCount;
// 阈值(超过此值 即扩充threshold=loadFactor*CAPACITY)
int threshold;
final float loadFactor;
public HashMap()
/public HashMap(int initialCapacity)
/public HashMap(int initialCapacity, float loadFactor)
/public HashMap(Map extends K, ? extends V> m)
put
public V remove(Object key)
public V get(Object key)
/public V put(K key, V value)
forEach
/ Iterator
public int size()
public boolean isEmpty()
public boolean containsKey(Object key)
public Set keySet()
public Set> entrySet()
值得注意的是:
put
KeyIterator
/ ValueIterator
/ EntryIterator
.Node
static class Node implements Map.Entry {
final int hash;
final K key;
V value;
Node next;
Node(int hash, K key, V value, Node next) {
this.hash = hash;
this.key = key;
this.value = value;
this.next = next;
}
public final K getKey() { return key; }
public final V getValue() { return value; }
public final String toString() { return key + "=" + value; }
public final int hashCode() {
return Objects.hashCode(key) ^ Objects.hashCode(value);
}
public final V setValue(V newValue) {
V oldValue = value;
value = newValue;
return oldValue;
}
// 2个Node节点相等逻辑如下:
// 1. this 下标地址相等
//
public final boolean equals(Object o) {
if (o == this)
return true;
if (o instanceof Map.Entry) {
Map.Entry,?> e = (Map.Entry,?>)o;
if (Objects.equals(key, e.getKey()) &&
Objects.equals(value, e.getValue()))
return true;
}
return false;
}
}
public class HashMap extends AbstractMap
implements Map, Cloneable, Serializable {
可见. 除了实现Cloneable
和Serializable
接口外. 还继承了AbstractMap
抽象类.
我们先看下Map
接口.
public interface Map {
int size();
boolean isEmpty();
boolean containsKey(Object key);
boolean containsValue(Object value);
V get(Object key);
V put(K key, V value);
V remove(Object key);
void putAll(Map extends K, ? extends V> m);
void clear();
Set keySet();
Collection values();
Set> entrySet();
boolean equals(Object o);
int hashCode();
default V getOrDefault(Object key, V defaultValue) {
default void forEach(BiConsumer super K, ? super V> action) {
default void replaceAll(BiFunction super K, ? super V, ? extends V> function) {
default V putIfAbsent(K key, V value) {
default boolean remove(Object key, Object value) {
default boolean replace(K key, V oldValue, V newValue) {
default V replace(K key, V value) {
default V computeIfAbsent(K key,
Function super K, ? extends V> mappingFunction) {
default V computeIfPresent(K key,
BiFunction super K, ? super V, ? extends V> remappingFunction) {
default V compute(K key,
BiFunction super K, ? super V, ? extends V> remappingFunction) {
default V merge(K key, V value,
BiFunction super V, ? super V, ? extends V> remappingFunction) {
}
其中Mao
接口内还维护一个Entry
接口.
interface Entry {
K getKey();
V getValue();
V setValue(V value);
boolean equals(Object o);
int hashCode();
public static , V> Comparator> comparingByKey() {
public static > Comparator> comparingByValue() {
public static Comparator> comparingByKey(Comparator super K> cmp) {
public static Comparator> comparingByValue(Comparator super V> cmp) {
}
HashMap
的构造函数一共有3种:
public HashMap()
无参构造public HashMap(int initialCapacity)
带初始长度构造public HashMap(int initialCapacity, float loadFactor)
带初始长度和扩充因子构造 public HashMap() {
this.loadFactor = DEFAULT_LOAD_FACTOR; // all other fields defaulted
}
public HashMap(int initialCapacity) {
this(initialCapacity, DEFAULT_LOAD_FACTOR);
}
public HashMap(int initialCapacity, float loadFactor) {
if (initialCapacity < 0)
throw new IllegalArgumentException("Illegal initial capacity: " +
initialCapacity);
if (initialCapacity > MAXIMUM_CAPACITY)
initialCapacity = MAXIMUM_CAPACITY;
if (loadFactor <= 0 || Float.isNaN(loadFactor))
throw new IllegalArgumentException("Illegal load factor: " +
loadFactor);
this.loadFactor = loadFactor;
this.threshold = tableSizeFor(initialCapacity);
}
我们可以看下tableSizeFor
方法
static final int tableSizeFor(int cap) {
int n = cap - 1;
n |= n >>> 1;
n |= n >>> 2;
n |= n >>> 4;
n |= n >>> 8;
n |= n >>> 16;
return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
}
这个方法控制的是得到比传递值x
大的2^N
值.
比如: 传递7
, 那么返回值为8
. 传递15
, 返回值为16
, 传递16
, 返回值为16
.
这个方法确保当前的HashMap的数组长度为2^N
. 这样做的作用为保证hashCode()
的稳定性和命中率.
这样可能看起来比较抽象, 那么举个例子好了:
我们例举数字 10 也就是 1010, 转换为32位为:
00000000 00000000 00000000 00001010
随后, 先右移动1位, 计算位并操作.
n |= n >>> 1;
00000000 00000000 00000000 00001010
00000000 00000000 00000000 00000101
--------------------------------------------------------
00000000 00000000 00000000 00001111
其实此次操作. 只要保证最高位和高位第二位都为1即可. 也就是 第4和第3位.
随后, 再进行移位操作(移动2位/移动4位/移动8位/移动16位). 将其位数都置为1. 即 `2^N-1`
然后再+1. 即可获得 “不小于x”的 ` 2^N 值`.
为什么需要指定数组长度为
2^N
? 这与HashCode值有什么关系? 如何保证命中率? 我们下文再说.
此外还有一个特别的构造函数public HashMap(Map extends K, ? extends V> m)
. 这个我们放到后面新增的过程再说.
public HashMap(Map extends K, ? extends V> m) {
this.loadFactor = DEFAULT_LOAD_FACTOR;
putMapEntries(m, false);
}
final void putMapEntries(Map extends K, ? extends V> m, boolean evict) {
int s = m.size();
if (s > 0) {
if (table == null) { // pre-size
float ft = ((float)s / loadFactor) + 1.0F;
int t = ((ft < (float)MAXIMUM_CAPACITY) ?
(int)ft : MAXIMUM_CAPACITY);
if (t > threshold)
threshold = tableSizeFor(t);
}
else if (s > threshold)
resize();
for (Map.Entry extends K, ? extends V> e : m.entrySet()) {
K key = e.getKey();
V value = e.getValue();
putVal(hash(key), key, value, false, evict);
}
}
}
static final int hash(Object key) {
int h;
return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}
在看下新增方法之前. 我们先看下hash()
方法.
为什么使用h=key.hashCode()^(h>>>16)
来计算hash
值?
hash
散列之前. 我们先看下index
定位函数.hashCode % length
, 当length
为2^N
时. hashCode%length
可以简化为hashCode & (length-1)
. 这也不难理解. 我们举一个例子.例子1:
length = 4 hashCode为7. ok. 全部转变为二进制数为如下所示:
hashCode = 0111
length = 0100
---------------------------
取模 为 0011 即为3.
例子2:
length=4 hashCode=103
hashCode = 0110 1011
length= 0000 0100
---------------------------
0000 0011
仔细查看不难发现规律. 即每次只取length后几位. 所以有做法. 将length变为length-1, 然后取交集运算. 即hashCode & (length-1)
. 注意, 这里的length
需要保持2^N
, 以方便完成这个计算.
为什么使用
h=key.hashCode()^(h>>>16)
来计算hash
值?
- 根据上面的计算规则来发现. index下标与
length
的N
息息相关. 但是当hashCode
的后N
位确定后, 其下标的Index
位置也跟随确定. 为了加深随机性, 让数据更加分散, 所以有了^
异或运算.为什么取16?
- 因为大部分情况下, HashMap的长度是低于
2^16
的. (我们经常处理数据集的长度达不到那么多)- hashCode 返回的是一个
int
数组类型(32位). 取中值也是一贯的随机性策略.Reference
[1]. HashMap中hash(Object key)原理,为什么(hashcode >>> 16)。
[2]. 运算符^(异或)
假设您使用以下方法天真地索引了哈希表
int index = hashcode % table.length;
在某些常见用例中,这可能导致许多冲突.例如,假设table.length是2的小数(例如32或64).在这种情况下,仅哈希码的低位确定索引.如果对象的哈希码仅在高位不同,这将导致很多冲突.位移允许哈希码的高位也影响计算的索引.
为什么返回除key.hashcode之外的(h = key.hashCode())^(h >>> 16)?
put
public V put(K key, V value) {
return putVal(hash(key), key, value, false, true);
}
对于HashMap
来说, 新增和修改都是使用put
方法.
putVal
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
boolean evict) {
Node[] tab; Node p; int n, i;
if ((tab = table) == null || (n = tab.length) == 0)
// 第一次没初始化 / 或者长度为0 -> 进行扩容操作
n = (tab = resize()).length;
if ((p = tab[i = (n - 1) & hash]) == null)
// 是当前Hash槽内的第一个元素
tab[i] = newNode(hash, key, value, null);
else {
Node e; K k;
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
// 第一个节点就是. 做更新操作.
e = p;
else if (p instanceof TreeNode)
// 第一个节点是树节点. 转到红黑树新增&更新节点操作
e = ((TreeNode)p).putTreeVal(this, tab, hash, key, value);
else {
for (int binCount = 0; ; ++binCount) {
//顺序搜寻链表, 找到最后一个节点 插入
if ((e = p.next) == null) {
p.next = newNode(hash, key, value, null);
// 如果长度大于8 转换为红黑树
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
// 如果后面的节点找到一样的节点. 做更新操作.
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
}
// 存在当前key值节点 将value值进行替换.
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
afterNodeAccess(e);
return oldValue;
}
}
// 版本号+1
++modCount;
if (++size > threshold)
// 长度到达阈值 扩容
resize();
afterNodeInsertion(evict);
return null;
}
putVal
是其新增方法的具体实现. 我们可以仔细看下这个方法.
if ((tab = table) == null || (n = tab.length) == 0)
if ((p = tab[i = (n - 1) & hash]) == null)
if (p.hash == hash &&((k = p.key) == key || (key != null && key.equals(k))))
else if (p instanceof TreeNode)
else {
TREEIFY_THRESHOLD = 8
要转变为红黑树.更新的逻辑大致如上所示.
resize
扩容 final Node[] resize() {
Node[] oldTab = table;
int oldCap = (oldTab == null) ? 0 : oldTab.length;
int oldThr = threshold;
int newCap, newThr = 0;
if (oldCap > 0) {
if (oldCap >= MAXIMUM_CAPACITY) {
// 已经到达HashMap类型的极限值
threshold = Integer.MAX_VALUE;
return oldTab;
}
else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
oldCap >= DEFAULT_INITIAL_CAPACITY)
newThr = oldThr << 1; // double threshold
}
// 初始化操作.
else if (oldThr > 0) // initial capacity was placed in threshold
newCap = oldThr;
else { // zero initial threshold signifies using defaults
newCap = DEFAULT_INITIAL_CAPACITY;
newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
}
if (newThr == 0) {
float ft = (float)newCap * loadFactor;
newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
(int)ft : Integer.MAX_VALUE);
}
threshold = newThr;
@SuppressWarnings({"rawtypes","unchecked"})
Node[] newTab = (Node[])new Node[newCap];
table = newTab;
if (oldTab != null) {
for (int j = 0; j < oldCap; ++j) {
Node e;
if ((e = oldTab[j]) != null) {
oldTab[j] = null;
if (e.next == null)
newTab[e.hash & (newCap - 1)] = e;
else if (e instanceof TreeNode)
((TreeNode)e).split(this, newTab, j, oldCap);
else { // preserve order
Node loHead = null, loTail = null;
Node hiHead = null, hiTail = null;
Node next;
do {
next = e.next;
if ((e.hash & oldCap) == 0) {
if (loTail == null)
loHead = e;
else
loTail.next = e;
loTail = e;
}
else {
if (hiTail == null)
hiHead = e;
else
hiTail.next = e;
hiTail = e;
}
} while ((e = next) != null);
if (loTail != null) {
loTail.next = null;
newTab[j] = loHead;
}
if (hiTail != null) {
hiTail.next = null;
newTab[j + oldCap] = hiHead;
}
}
}
}
}
return newTab;
}
方法主要分成2个部分: 1. 重新计算新数组类型长度 2. 重新声明新数组, 将旧数组内数据进行转换.
newCapacity
& newThrehold
if (oldCap > 0) {
if (oldCap >= MAXIMUM_CAPACITY) {
// 已经到达HashMap类型的极限值
threshold = Integer.MAX_VALUE;
return oldTab;
}
else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
oldCap >= DEFAULT_INITIAL_CAPACITY)
newThr = oldThr << 1; // double threshold
}
// 初始化操作.
else if (oldThr > 0) // initial capacity was placed in threshold
newCap = oldThr;
else { // zero initial threshold signifies using defaults
newCap = DEFAULT_INITIAL_CAPACITY;
newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
}
if (newThr == 0) {
float ft = (float)newCap * loadFactor;
newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
(int)ft : Integer.MAX_VALUE);
}
threshold = newThr;
@SuppressWarnings({"rawtypes","unchecked"})
// 声明新数组. 分配堆空间.
Node[] newTab = (Node[])new Node[newCap];
table = newTab;
if (oldTab != null) {
for (int j = 0; j < oldCap; ++j) {
Node e;
// 对每一个hash槽进行转换.
if ((e = oldTab[j]) != null) {
// 设置为null 便于垃圾回收
oldTab[j] = null;
// 只有一个节点
if (e.next == null)
newTab[e.hash & (newCap - 1)] = e;
// 是树类型
else if (e instanceof TreeNode)
((TreeNode)e).split(this, newTab, j, oldCap);
// 是链表类型
else { // preserve order
Node loHead = null, loTail = null;
Node hiHead = null, hiTail = null;
Node next;
do {
next = e.next;
if ((e.hash & oldCap) == 0) {
if (loTail == null)
loHead = e;
else
loTail.next = e;
loTail = e;
}
else {
if (hiTail == null)
hiHead = e;
else
hiTail.next = e;
hiTail = e;
}
} while ((e = next) != null);
if (loTail != null) {
loTail.next = null;
newTab[j] = loHead;
}
if (hiTail != null) {
hiTail.next = null;
newTab[j + oldCap] = hiHead;
}
}
}
}
}
该过程主要可以分成如下几个步骤:
split
操作.hash&oldCapacity=0
, 将相对应的数组扩容到新数组的高位和低位. 因为我们知道原数组长度为2^N
. 每次扩容都是扩容为原来的2倍. 所以, 新数组自然被扩充为oldIndex
和oldIndex+oldLength
2部分. 这也就是说的高位和低位. public V get(Object key) {
Node e;
return (e = getNode(hash(key), key)) == null ? null : e.value;
}
final Node getNode(int hash, Object key) {
Node[] tab; Node first, e; int n; K k;
if ((tab = table) != null && (n = tab.length) > 0 &&
(first = tab[(n - 1) & hash]) != null) {
// hash槽第一个节点
if (first.hash == hash && // always check first node
((k = first.key) == key || (key != null && key.equals(k))))
return first;
if ((e = first.next) != null) {
// 树节点
if (first instanceof TreeNode)
return ((TreeNode)first).getTreeNode(hash, key);
do {
// 链表节点
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
return e;
} while ((e = e.next) != null);
}
}
return null;
}
查询同样分成3种情况:
remove(Object key, Object value)
public boolean remove(Object key, Object value) {
return removeNode(hash(key), key, value, true, true) != null;
}
removeNode(int hash, Object key, Object value, boolean matchValue, boolean movable)
final Node removeNode(int hash, Object key, Object value,
boolean matchValue, boolean movable) {
Node[] tab; Node p; int n, index;
// 前提条件 数组不为空 & hashCode不为0 &hash槽位置有节点
if ((tab = table) != null && (n = tab.length) > 0 &&
(p = tab[index = (n - 1) & hash]) != null) {
Node node = null, e; K k; V v;
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
node = p;
else if ((e = p.next) != null) {
if (p instanceof TreeNode)
node = ((TreeNode)p).getTreeNode(hash, key);
else {
do {
if (e.hash == hash &&
((k = e.key) == key ||
(key != null && key.equals(k)))) {
node = e;
break;
}
p = e;
} while ((e = e.next) != null);
}
}
// 前面的逻辑和查询的逻辑一致.
if (node != null && (!matchValue || (v = node.value) == value ||
(value != null && value.equals(v)))) {
// 删除树节点
if (node instanceof TreeNode)
((TreeNode)node).removeTreeNode(this, tab, movable);
else if (node == p)
// 删除链表节点 且其为第一个节点
tab[index] = node.next;
else
// 删除链表后续节点
p.next = node.next;
++modCount;
--size;
afterNodeRemoval(node);
return node;
}
}
return null;
}
删除操作主要分成2部:
final class KeyIterator extends HashIterator
implements Iterator {
public final K next() { return nextNode().key; }
}
final class ValueIterator extends HashIterator
implements Iterator {
public final V next() { return nextNode().value; }
}
final class EntryIterator extends HashIterator
implements Iterator> {
public final Map.Entry next() { return nextNode(); }
}
Iterator
未完待续. 等一起总结.
Q1. HashMap 采用什么样的结构设计?
数组+链表/红黑树.
Q2: HashMap的扩容机制?
Q3: HashMap的查询机制?
Q4: HashMap为什么线程不安全?为什么?
[1]. JDK1.8源码(七)——java.util.HashMap 类
[2]. 为什么返回除key.hashcode之外的(h = key.hashCode())^(h >>> 16)?
[3]. HashMap扩容时的rehash方法中(e.hash & oldCap) == 0算法推导