今天来学习一下Dictionary
的源码底层实现
一、全局变量
// 字典中数据的基本存储单元、条目,可以认为是一个链表的结点
private struct Entry
{
public int hashCode; // 哈希码,Lower 31 bits of hash code, -1 if unused
public int next; // 同一个槽位上,下一个链表结点在entries数组中对应的索引,Index of next entry, -1 if last
public TKey key; // 字典的key,Key of entry
public TValue value; // 字典的value,Value of entry
}
// hash桶,长度size为比字典容量capacity大的最小质数,索引相当于桶的槽位targetBucket,值为key映射到Entry数组的索引
// 值其实就是所有碰撞到该槽位的链表的根结点在Entry数组中的索引(这里比较绕)
private int[] buckets;
// Entry数组存放实际的数据,长度size为比容量capacity大的最小质数
private Entry[] entries;
private int count; // entries数组中所有曾经添加过的长度,只增不减,Clear时清0,删除操作count不会变,freeCount会+1
private int version;
private int freeList; // 被删除元素所在Entry组成链表的头结点,插入时先插入到这里
private int freeCount; // 已经删除元素的数量,初始为0
private IEqualityComparer<TKey> comparer;
private KeyCollection keys;
private ValueCollection values;
private Object _syncRoot;
二、初始化
// HashTable中预存的int类型的所有质数
public static readonly int[] primes =
{
3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369
};
private void Initialize(int capacity)
{
// 调用HashTable的方法获取比字典容量大的最小质数,如果没有手动设置容量,程序会在Insert时以capacity = 0进行初始化,此时容量是0,但得到的size是3
int size = HashHelpers.GetPrime(capacity);
buckets = new int[size];
for (int i = 0; i < buckets.Length; i++) buckets[i] = -1;
entries = new Entry[size];
freeList = -1;
}
三、Hash碰撞
0x7FFFFFFF
是16进制表示的最大正整型数,此处是为了忽略符号位,获取非负数哈希码 int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
key
获取hashCode
,然后hashCode
与hash桶进行碰撞,以获取碰撞到的槽位,根据槽位上的值即可确定该元素在entries
中的位置 int targetBucket = hashCode % buckets.Length;
key
有可能碰撞到同一个槽位上,如:4%5=4
和9%5=4
,不同的键4,9
都碰撞到了索引为4
的槽上next
指针建立联系,查找时如果链表不止一个结点,遍历该单链表即可四、属性与方法
注意各种长度
size: Entry数组与Hash桶数组(以下简称数组)的总长度,质数,扩容的临界长度,所有长度中最大
capacity: 程序员可以手动设置的容量,只在字典初始化时用,用于决定数组的大小,程序员没有手动设置时,程序会在Insert
方法时以0进行初始化,此时得到的size是最小质数3
count: 所有数组中曾经添加过元素的长度,等于size时扩容
freeCount: 数组中某位置之前添加元素了,后又被删除了,目前没有元素,这样的位置的总和
Count: 所有字典或数组中实际目前存在的元素个数,暴露给外界的接口
获取字典的长度:总长度减去删除的长度
public int Count
{
get { return count - freeCount; }
}
private void Insert(TKey key, TValue value, bool add) {
if( key == null ) {
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
}
if (buckets == null) Initialize(0);
// hash碰撞
int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
int targetBucket = hashCode % buckets.Length;
#if FEATURE_RANDOMIZED_STRING_HASHING
int collisionCount = 0;
#endif
// 如果i >= 0说明之前已经有元素碰撞到这个槽位,该槽位至少有一个结点
for (int i = buckets[targetBucket]; i >= 0; i = entries[i].next) {
// 先检查是否字典中是否已经存在该键
if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) {
if (add) {
ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_AddingDuplicate);
}
entries[i].value = value;
version++;
return;
}
#if FEATURE_RANDOMIZED_STRING_HASHING
collisionCount++;
#endif
}
int index;
// 字典之前有元素被删除,优先插入到被删除的部位
if (freeCount > 0) {
index = freeList;
freeList = entries[index].next;
freeCount--;
}
else {
if (count == entries.Length) // 字典装不下了扩容
{
Resize();
targetBucket = hashCode % buckets.Length;
}
index = count;
count++;
}
// 将该结点设为头结点,指向原来的头结点
entries[index].hashCode = hashCode;
entries[index].next = buckets[targetBucket];
entries[index].key = key;
entries[index].value = value;
buckets[targetBucket] = index;
version++;
#if FEATURE_RANDOMIZED_STRING_HASHING
#if FEATURE_CORECLR
// In case we hit the collision threshold we'll need to switch to the comparer which is using randomized string hashing
// in this case will be EqualityComparer.Default.
// Note, randomized string hashing is turned on by default on coreclr so EqualityComparer.Default will
// be using randomized string hashing
if (collisionCount > HashHelpers.HashCollisionThreshold && comparer == NonRandomizedStringEqualityComparer.Default)
{
comparer = (IEqualityComparer<TKey>) EqualityComparer<string>.Default;
Resize(entries.Length, true);
}
#else
// 如果碰撞次数超过阀值进行扩容,注意该次扩容并没有扩大容量,而是重新计算了hashCode(更改了comparer)
if(collisionCount > HashHelpers.HashCollisionThreshold && HashHelpers.IsWellKnownEqualityComparer(comparer))
{
comparer = (IEqualityComparer<TKey>) HashHelpers.GetRandomizedEqualityComparer(comparer);
Resize(entries.Length, true); // 注意这里长度并没有变,注意这里的2个参数
}
#endif // FEATURE_CORECLR
#endif
}
public const int HashCollisionThreshold = 100; // 默认碰撞次数阀值为100
HashTable
的ExpandPrime
方法先扩大容量为原来的2
倍,再取最小质数HashCode
(更改了comparer
) private void Resize()
{
Resize(HashHelpers.ExpandPrime(count), false);
}
public static int ExpandPrime(int oldSize)
{
int newSize = 2 * oldSize;
// Allow the hashtables to grow to maximum possible size (~2G elements) before encoutering capacity overflow.
// Note that this check works even when _items.Length overflowed thanks to the (uint) cast
if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
{
Contract.Assert( MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength");
return MaxPrimeArrayLength;
}
return GetPrime(newSize);
}
// 第二个参数是否强制更新hashCode、是否由于碰撞次数过多引起的扩容
private void Resize(int newSize, bool forceNewHashCodes)
{
Contract.Assert(newSize >= entries.Length);
int[] newBuckets = new int[newSize];
for (int i = 0; i < newBuckets.Length; i++) newBuckets[i] = -1;
Entry[] newEntries = new Entry[newSize];
Array.Copy(entries, 0, newEntries, 0, count); // 将原来的数据拷贝过来
if(forceNewHashCodes) {
for (int i = 0; i < count; i++) {
if(newEntries[i].hashCode != -1) {
newEntries[i].hashCode = (comparer.GetHashCode(newEntries[i].key) & 0x7FFFFFFF);
}
}
}
// 重建hash链
for (int i = 0; i < count; i++) {
if (newEntries[i].hashCode >= 0) {
int bucket = newEntries[i].hashCode % newSize;
// 如果该槽位已经有元素,则更新链表的头结点为当前元素
newEntries[i].next = newBuckets[bucket];
newBuckets[bucket] = i;
}
}
buckets = newBuckets;
entries = newEntries;
}
bool
返回值 public bool Remove(TKey key) {
if(key == null) {
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
}
if (buckets != null) {
int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
int bucket = hashCode % buckets.Length;
int last = -1; // 用于标识该链表的上一个结点
// entries[buckets[bucket]]头结点
for (int i = buckets[bucket]; i >= 0; last = i, i = entries[i].next) {
if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) {
if (last < 0) { // 删除的是头结点
buckets[bucket] = entries[i].next;
}
else { // 链表中中间结点的删除
entries[last].next = entries[i].next;
}
entries[i].hashCode = -1;
entries[i].next = freeList;
entries[i].key = default(TKey);
entries[i].value = default(TValue);
freeList = i;
freeCount++;
version++;
return true;
}
}
}
return false;
}
public bool ContainsKey(TKey key)
{
return FindEntry(key) >= 0;
}
private int FindEntry(TKey key)
{
if( key == null) {
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
}
if (buckets != null) {
int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
for (int i = buckets[hashCode % buckets.Length]; i >= 0; i = entries[i].next) {
if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) return i;
}
}
return -1;
}
public bool ContainsValue(TValue value)
{
if (value == null) {
for (int i = 0; i < count; i++) {
if (entries[i].hashCode >= 0 && entries[i].value == null) return true;
}
}
else {
EqualityComparer<TValue> c = EqualityComparer<TValue>.Default;
for (int i = 0; i < count; i++) {
if (entries[i].hashCode >= 0 && c.Equals(entries[i].value, value)) return true;
}
}
return false;
}
基于.NET Framework 4.8