如无特别说明,源码分析基于JKD1.8
public class ArrayList extends AbstractList
implements List, RandomAccess, Cloneable, java.io.Serializable
数组的默认大小为0private static final int DEFAULT_CAPACITY = 10;
public boolean add(E e) {
ensureCapacityInternal(size + 1); // Increments modCount!!
elementData[size++] = e;
return true;
}
private void ensureCapacityInternal(int minCapacity) {
if (elementData == DEFAULTCAPACITY_EMPTY_ELEMENTDATA) {
minCapacity = Math.max(DEFAULT_CAPACITY, minCapacity);
}
ensureExplicitCapacity(minCapacity);
}
private void ensureExplicitCapacity(int minCapacity) {
modCount++;
// overflow-conscious code
if (minCapacity - elementData.length > 0)
grow(minCapacity);
}
private void grow(int minCapacity) {
// overflow-conscious code
int oldCapacity = elementData.length;
int newCapacity = oldCapacity + (oldCapacity >> 1);
if (newCapacity - minCapacity < 0)
newCapacity = minCapacity;
if (newCapacity - MAX_ARRAY_SIZE > 0)
newCapacity = hugeCapacity(minCapacity);
// minCapacity is usually close to size, so this is a win:
elementData = Arrays.copyOf(elementData, newCapacity);
}
public E remove(int index) {
rangeCheck(index);
modCount++;
E oldValue = elementData(index);
int numMoved = size - index - 1;
if (numMoved > 0)
System.arraycopy(elementData, index+1, elementData, index, numMoved);
elementData[--size] = null; // clear to let GC do its work
return oldValue;
}
transient Object[] elementData; // non-private to simplify nested class access
ArrayList 实现了writeObject() 和 readObject()来控制只序列化数组中有元素填充的那部分内容private void readObject(java.io.ObjectInputStream s)
throws java.io.IOException, ClassNotFoundException {
elementData = EMPTY_ELEMENTDATA;
// Read in size, and any hidden stuff
s.defaultReadObject();
// Read in capacity
s.readInt(); // ignored
if (size > 0) {
// be like clone(), allocate array based upon size not capacity
ensureCapacityInternal(size);
Object[] a = elementData;
// Read in all elements in the proper order.
for (int i=0; i
序列化时需要使用ObjectOutputStream的WriteObject()将对象转化为字节流并输出。而writeObject()方法在传入的对象存在writeObject()的时候回去反射调用该对象的writeObject()来实现序列化。反序列化使用的是ObjectInputStream的readObject()方法ArrayList list = new ArrayList();
ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(file));
oos.writeObject(list);
public synchronized boolean add(E e) {
modCount++;
ensureCapacityHelper(elementCount + 1);
elementData[elementCount++] = e;
return true;
}
public synchronized E get(int index) {
if (index >= elementCount)
throw new ArrayIndexOutOfBoundsException(index);
return elementData(index);
}
List list = new ArrayList<>();
List synList = Collections.synchronizedList(list);
也可以使用concurrent并发包下的CopyOnWriteArrayList类。List list = new CopyOnWriteArrayList<>();
public boolean add(E e) {
final ReentrantLock lock = this.lock;
lock.lock();
try {
Object[] elements = getArray();
int len = elements.length;
Object[] newElements = Arrays.copyOf(elements, len + 1);
newElements[len] = e;
setArray(newElements);
return true;
} finally {
lock.unlock();
}
}
final void setArray(Object[] a) {
array = a;
}
@SuppressWarnings("unchecked")
private E get(Object[] a, int index) {
return (E) a[index];
}
private static class Node {
E item;
Node next;
Node prev;
}
每个链表存储了first和last指针transient Node first;
transient Node last;
以下源码分析JDK1.7为主
存储结构
内部包含了一个Entry类型的数组table
transient Entry[] table;
Entry存储着键值对,它包含了四个字段,从next字段我们可以看出Entry是一个链表。即数组中的每个位置被当成一个桶,一个桶存放一个链表。HashMap使用拉链法来解决冲突,同一个链表中存放哈希值和散列桶取模运算结果相同的Entry。
拉链法的工作原理
HashMap map = new HashMap<>();
map.put("K1", "V1");
map.put("K2", "V2");
map.put("K3", "V3");
新建一个HashMap,默认大小为16;
插入
插入
应该注意到链表的插入是以头插的方式进行的,例如上面的
查找需要分成2布进行:
put操作
HashMap允许插入键为null的键值对。但是无法调用null的hashCode()方法,也就是无法确定该键值对的桶下标,只能通过强制指定一个桶下标来存放。HashMap使用第0个桶存放键为null的键值对。
确定桶下标
int hash = hash(key);
int i = indexFor(hash, table.length);
final int hash(Object k) {
int h = hashSeed;
if (0 != h && k instanceof String) {
return sun.misc.Hashing.stringHash32((String) k);
}
h ^= k.hashCode();
// This function ensures that hashCodes that differ only by
// constant multiples at each bit position have a bounded
// number of collisions (approximately 8 at default load factor).
h ^= (h >>> 20) ^ (h >>> 12);
return h ^ (h >>> 7) ^ (h >>> 4);
}
public final int hashCode() {
return Objects.hashCode(key) ^ Objects.hashCode(value);
}
y : 10110010
x-1 : 00001111
y&(x-1) : 00000010
这个性质和y对x取模效果是一样的:
y : 10110010
x : 00010000
y%x : 00000010
我们知道位运算的代价比求模运算小得多,因此在进行这种计算时用位运算的话能带来更高的性能。
static int indexFor(int h, int length) {
return h & (length-1);
}
扩容-基本原理
设HashMap的table长度为M,需要存储的键值对数量为N,如果哈希函数满足均匀性的要求,那么每条链表长度大约为N/M,因此平均查找次数的复杂度为O(N/M)。
为了让查找的成本降低,应该尽可能使得N/M尽可能小,因此需要保证M尽可能大,也就是说table要尽可能大。HashMap采用动态扩容来根据当前N值来调整M值,使得空间效率和时间效率都能得到保证。
和扩容相关的参数主要有 capacity、size、threshold、load_factor。
static final int DEFAULT_INITIAL_CAPACITY = 16;
static final int MAXIMUM_CAPACITY = 1 << 30;
static final float DEFAULT_LOAD_FACTOR = 0.75f;
transient Entry[] table;
transient int size;
int threshold;
final float loadFactor;
transient int modCount;
从下面的添加元素代码中可以看出,当需要扩容时,领capacity为原来的两倍
void addEntry(int hash, K key, V value, int bucketIndex) {
Entry e = table[bucketIndex];
table[bucketIndex] = new Entry<>(hash, key, value, e);
if (size++ >= threshold)
}
扩容使用resize()实现,需要注意的是,扩容操作同样需要把oldTable的所有键值对重新插入newTable中,因此这一步是很费时的。
void resize(int newCapacity) {
Entry[] oldTable = table;
int oldCapacity = oldTable.length;
if (oldCapacity == MAXIMUM_CAPACITY) {
threshold = Integer.MAX_VALUE;
return;
}
Entry[] newTable = new Entry[newCapacity];
transfer(newTable);
table = newTable;
threshold = (int)(newCapacity * loadFactor);
}
void transfer(Entry[] newTable) {
Entry[] src = table;
int newCapacity = newTable.length;
for (int j = 0; j < src.length; j++) {
Entry e = src[j];
if (e != null) {
src[j] = null;
do {
Entry next = e.next;
int i = indexFor(e.hash, newCapacity);
e.next = newTable[i];
newTable[i] = e;
e = next;
} while (e != null);
}
}
}
扩容-重新计算桶下标
在进行扩容时,需要把键值对重新放到对应的桶上。HashMap使用了一个特殊的机制,可以降低重新计算桶下标的操作。
假设原数组长度capacity为16,扩容之后new capacity为32:
capacity : 00010000
new capacity : 00100000
对于一个key,
它的哈希值如果在第5位上为0,那么取模得到的结果和之前一样;
如果为1,那么得到的接货为原来结果的+16。—不理解
计算数组容量
HashMap构造函数允许用户传入容量不是2的n次方,应为它可以自动地将传入的容量转换为2的n次方。先考虑如何求一个数的掩码,对于10010000,他的掩码为11111111,可以用以下方法得到:
mask |= mask >> 1 11011000
mask |= mask >> 2 11111110
mask |= mask >> 4 11111111
mask + 1是大于原始数字的最小的2的n次方。
num 10010000
mask+1 100000000
以下是HashMap中计算数组容量的代码:
static final int tableSizeFor(int cap) {
int n = cap - 1;
n |= n >>> 1;
n |= n >>> 2;
n |= n >>> 4;
n |= n >>> 8;
n |= n >>> 16;
return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
}
链表转红黑树
从JDK1.8开始一个桶存储的链表长度大于8时会将链表转成红黑树。
与HashTable的比较
static final class HashEntry {
final int hash;
final K key;
volatile V value;
volatile HashEntry next;
}
ConcurrentHashMap和HashMap实现上类似,最主要的差别是ConcurrentHashMap采用了分段锁(Segment),每个分段锁维护着几个桶(HashEntry),多个线程可以同时访问不同分段锁上的桶,从而使其并发度更高(并发度就是Segment的个数)。static final class Segment extends ReentrantLock implements Serializable {
private static final long serialVersionUID = 2249069246763182397L;
static final int MAX_SCAN_RETRIES =
Runtime.getRuntime().availableProcessors() > 1 ? 64 : 1;
transient volatile HashEntry[] table;
transient int count;
transient int modCount;
transient int threshold;
final float loadFactor;
final Segment[] segments;
}
并发的博人级别为16,也就是说默认创建16个segment。static final int DEFAULT_CONCURRENCY_LEVEL = 16;
/**
* The number of elements. Accessed only either within locks
* or among other volatile reads that maintain visibility.
*/
transient int count;
在执行size操作时,需要遍历所有Segment然后把count累计起来。/**
* Number of unsynchronized retries in size and containsValue
* methods before resorting to locking. This is used to avoid
* unbounded retries if tables undergo continuous modification
* which would make it impossible to obtain an accurate result.
*/
static final int RETRIES_BEFORE_LOCK = 2;
public int size() {
// Try a few times to get accurate count. On failure due to
// continuous async changes in table, resort to locking.
final Segment[] segments = this.segments;
int size;
boolean overflow; // true if size overflows 32 bits
long sum; // sum of modCounts
long last = 0L; // previous sum
int retries = -1; // first iteration isn't retry
try {
for (;;) {
// 超过尝试次数,则对每个 Segment 加锁
if (retries++ == RETRIES_BEFORE_LOCK) {
for (int j = 0; j < segments.length; ++j)
ensureSegment(j).lock(); // force creation
}
sum = 0L;
size = 0;
overflow = false;
for (int j = 0; j < segments.length; ++j) {
Segment seg = segmentAt(segments, j);
if (seg != null) {
sum += seg.modCount;
int c = seg.count;
if (c < 0 || (size += c) < 0)
overflow = true;
}
}
// 连续两次得到的结果一致,则认为这个结果是正确的
if (sum == last)
break;
last = sum;
}
} finally {
if (retries > RETRIES_BEFORE_LOCK) {
for (int j = 0; j < segments.length; ++j)
segmentAt(segments, j).unlock();
}
}
return overflow ? Integer.MAX_VALUE : size;
}
**存储结构 **
继承自HashMap,因此具有和HashMap一样的快速查找特性。
public class LinkedHashMap extends HashMap implements Map
内部维护了一个双向链表,用来维护插入顺序或者LRU顺序。
/**
* The head (eldest) of the doubly linked list.
*/
transient LinkedHashMap.Entry head;
/**
* The tail (youngest) of the doubly linked list.
*/
transient LinkedHashMap.Entry tail;
accessOrder决定了顺序,默认为false,此时维护的是插入顺序。
final boolean accessOrder;
LinkedHashMap最重要的是以下用于维护顺序的函数,它们会在put、get等方法中调用。
void afterNodeAccess(Node p) { }
void afterNodeInsertion(boolean evict) { }
afterNodeAccess()
当一个节点被访问时,如果accessOrder为true,则会将该节点移到链表尾部。也就是说指定为LRU(最近最久未使用)顺序之后,在每次访问一个节点时,会将这个节点移到链表尾部,保证链表尾部是最近访问的节点,那么链表首部就是最近最久未使用的节点。
void afterNodeAccess(Node e) { // move node to last
LinkedHashMap.Entry last;
if (accessOrder && (last = tail) != e) {
LinkedHashMap.Entry p =
(LinkedHashMap.Entry)e, b = p.before, a = p.after;
p.after = null;
if (b == null)
head = a; else
b.after = a;
if (a != null)
a.before = b;
else
last = b;
if (last == null)
head = p;
else {
p.before = last;
last.after = p;
}
tail = p;
++modCount;
}
}
afterNodeInsertion()
在put等操作之后执行,当removeEldestEntry()方法返回true时会移除最晚的节点,也就是链表首部节点first。evict只有在构建Map的时候才为falst,在这里为true。
void afterNodeInsertion(boolean evict) { // possibly remove eldest
LinkedHashMap.Entry first;
if (evict && (first = head) != null && removeEldestEntry(first)) {
K key = first.key;
removeNode(hash(key), key, null, false, true);
}
}
removeEldestEntry()默认为false,如果需要让他为true需要继承LinkedHashMap并且覆盖这个方法的实现,这在实现LRU的缓存中特别有用,通过移除最近最久未使用的节点,从而保证缓存空间足够,并且缓存的互数据都是热点数据。
protected boolean removeEldestEntry(Map.Entry eldest) {
return false;
}
LRU缓存
以下是使用Linked还是先的一个LRU缓存:
class LRUCache extends LinkedHashMap {
private static final int MAX_ENTRIES = 3;
protected boolean removeEldestEntry(Map.Entry eldest) {
return size() > MAX_ENTRIES;
}
LRUCache() {
super(MAX_ENTRIES, 0.75f, true);
}
}
public static void main(String[] args) {
LRUCache cache = new LRUCache<>();
cache.put(1, "a");
cache.put(2, "b");
cache.put(3, "c");
cache.get(1);
cache.put(4, "d");
System.out.println(cache.keySet());
}
[3, 1, 4]