C# Dictionary的实现原理

今天来学习一下Dictionary的源码底层实现
一、全局变量

		// 字典中数据的基本存储单元、条目,可以认为是一个链表的结点
        private struct Entry 
        {
            public int hashCode;    // 哈希码,Lower 31 bits of hash code, -1 if unused
            public int next;        // 同一个槽位上,下一个链表结点在entries数组中对应的索引,Index of next entry, -1 if last
            public TKey key;        // 字典的key,Key of entry
            public TValue value;    // 字典的value,Value of entry
        }
		// hash桶,长度size为比字典容量capacity大的最小质数,索引相当于桶的槽位targetBucket,值为key映射到Entry数组的索引
		// 值其实就是所有碰撞到该槽位的链表的根结点在Entry数组中的索引(这里比较绕)
        private int[] buckets;
        // Entry数组存放实际的数据,长度size为比容量capacity大的最小质数
        private Entry[] entries;
        private int count; 	   // entries数组中所有曾经添加过的长度,只增不减,Clear时清0,删除操作count不会变,freeCount会+1
        private int version;
        private int freeList;  // 被删除元素所在Entry组成链表的头结点,插入时先插入到这里
        private int freeCount; // 已经删除元素的数量,初始为0
        private IEqualityComparer<TKey> comparer;
        private KeyCollection keys;
        private ValueCollection values;
        private Object _syncRoot;

二、初始化

		// HashTable中预存的int类型的所有质数
        public static readonly int[] primes = 
        {
            3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
            1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
            17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
            187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
            1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369
        };
        private void Initialize(int capacity) 
        {
        	// 调用HashTable的方法获取比字典容量大的最小质数,如果没有手动设置容量,程序会在Insert时以capacity = 0进行初始化,此时容量是0,但得到的size是3
            int size = HashHelpers.GetPrime(capacity);
            buckets = new int[size];
            for (int i = 0; i < buckets.Length; i++) buckets[i] = -1;
            entries = new Entry[size];
            freeList = -1;
        }

三、Hash碰撞

  • HashCode
    0x7FFFFFFF是16进制表示的最大正整型数,此处是为了忽略符号位,获取非负数哈希码
		int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
  • 原理
    主要是为了得到桶的槽位信息,首先根据key获取hashCode,然后hashCode与hash桶进行碰撞,以获取碰撞到的槽位,根据槽位上的值即可确定该元素在entries中的位置
		int targetBucket = hashCode % buckets.Length;
  • 冲突
    不同的key有可能碰撞到同一个槽位上,如:4%5=49%5=4,不同的键4,9都碰撞到了索引为4的槽上
  • 拉链法
    将每一个元素视为一个单链表结点,如果该槽位只对应一个元素,则该单链表只有一个结点,碰撞到同一个槽位上的元素之间通过next指针建立联系,查找时如果链表不止一个结点,遍历该单链表即可

四、属性与方法

  • 注意各种长度
    size: Entry数组与Hash桶数组(以下简称数组)的总长度,质数,扩容的临界长度,所有长度中最大
    capacity: 程序员可以手动设置的容量,只在字典初始化时用,用于决定数组的大小,程序员没有手动设置时,程序会在Insert方法时以0进行初始化,此时得到的size是最小质数3
    count: 所有数组中曾经添加过元素的长度,等于size时扩容
    freeCount: 数组中某位置之前添加元素了,后又被删除了,目前没有元素,这样的位置的总和
    Count: 所有字典或数组中实际目前存在的元素个数,暴露给外界的接口

  • 获取字典的长度:总长度减去删除的长度

        public int Count 
        {
            get { return count - freeCount; }
        }
  • 字典元素的增加:字典的Add过程
        private void Insert(TKey key, TValue value, bool add) {
            if( key == null ) {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
            }
            if (buckets == null) Initialize(0);
            // hash碰撞
            int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
            int targetBucket = hashCode % buckets.Length;
#if FEATURE_RANDOMIZED_STRING_HASHING
            int collisionCount = 0;
#endif
			// 如果i >= 0说明之前已经有元素碰撞到这个槽位,该槽位至少有一个结点
            for (int i = buckets[targetBucket]; i >= 0; i = entries[i].next) {
            	// 先检查是否字典中是否已经存在该键
                if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) {
                    if (add) { 
ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_AddingDuplicate);
                    }
                    entries[i].value = value;
                    version++;
                    return;
                } 
#if FEATURE_RANDOMIZED_STRING_HASHING
                collisionCount++;
#endif
            }
            int index;
            // 字典之前有元素被删除,优先插入到被删除的部位
            if (freeCount > 0) {
                index = freeList;
                freeList = entries[index].next;
                freeCount--;
            }
            else {
                if (count == entries.Length) // 字典装不下了扩容
                {
                    Resize();
                    targetBucket = hashCode % buckets.Length;
                }
                index = count;
                count++;
            }
            // 将该结点设为头结点,指向原来的头结点
            entries[index].hashCode = hashCode;
            entries[index].next = buckets[targetBucket];
            entries[index].key = key;
            entries[index].value = value;
            buckets[targetBucket] = index;
            version++;
#if FEATURE_RANDOMIZED_STRING_HASHING
#if FEATURE_CORECLR
            // In case we hit the collision threshold we'll need to switch to the comparer which is using randomized string hashing
            // in this case will be EqualityComparer.Default.
            // Note, randomized string hashing is turned on by default on coreclr so EqualityComparer.Default will 
            // be using randomized string hashing
            if (collisionCount > HashHelpers.HashCollisionThreshold && comparer == NonRandomizedStringEqualityComparer.Default) 
            {
                comparer = (IEqualityComparer<TKey>) EqualityComparer<string>.Default;
                Resize(entries.Length, true);
            }
#else
			// 如果碰撞次数超过阀值进行扩容,注意该次扩容并没有扩大容量,而是重新计算了hashCode(更改了comparer)
            if(collisionCount > HashHelpers.HashCollisionThreshold && HashHelpers.IsWellKnownEqualityComparer(comparer)) 
            {
                comparer = (IEqualityComparer<TKey>) HashHelpers.GetRandomizedEqualityComparer(comparer);
                Resize(entries.Length, true); // 注意这里长度并没有变,注意这里的2个参数
            }
#endif // FEATURE_CORECLR
#endif
        } 
public const int HashCollisionThreshold = 100; // 默认碰撞次数阀值为100
  • 字典的扩容:两种情况,重建hash链,触发详见Insert方法
  1. 容量不足时扩容,调用HashTableExpandPrime方法先扩大容量为原来的2倍,再取最小质数
  2. 碰撞次数超过阀值时扩容,根据源码并没有扩大数组的大小,只是重新计算了HashCode(更改了comparer
        private void Resize() 
        {
            Resize(HashHelpers.ExpandPrime(count), false);
        }
        public static int ExpandPrime(int oldSize)
        {
            int newSize = 2 * oldSize;
            // Allow the hashtables to grow to maximum possible size (~2G elements) before encoutering capacity overflow.
            // Note that this check works even when _items.Length overflowed thanks to the (uint) cast
            if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
            {
                Contract.Assert( MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength");
                return MaxPrimeArrayLength;
            }
            return GetPrime(newSize);
        }
        // 第二个参数是否强制更新hashCode、是否由于碰撞次数过多引起的扩容
        private void Resize(int newSize, bool forceNewHashCodes) 
        {
            Contract.Assert(newSize >= entries.Length);
            int[] newBuckets = new int[newSize];
            for (int i = 0; i < newBuckets.Length; i++) newBuckets[i] = -1;
            Entry[] newEntries = new Entry[newSize];
            Array.Copy(entries, 0, newEntries, 0, count); // 将原来的数据拷贝过来
            if(forceNewHashCodes) {
                for (int i = 0; i < count; i++) {
                    if(newEntries[i].hashCode != -1) {
                        newEntries[i].hashCode = (comparer.GetHashCode(newEntries[i].key) & 0x7FFFFFFF);
                    }
                }
            }
            // 重建hash链
            for (int i = 0; i < count; i++) {
                if (newEntries[i].hashCode >= 0) {
                    int bucket = newEntries[i].hashCode % newSize;
                    // 如果该槽位已经有元素,则更新链表的头结点为当前元素
                    newEntries[i].next = newBuckets[bucket];
                    newBuckets[bucket] = i;
                }
            }
            buckets = newBuckets;
            entries = newEntries;
        }
  • 字典元素的删除:只需要删除键,有一个bool返回值
        public bool Remove(TKey key) {
            if(key == null) {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
            }
            if (buckets != null) {
                int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
                int bucket = hashCode % buckets.Length;
                int last = -1; // 用于标识该链表的上一个结点
                // entries[buckets[bucket]]头结点
                for (int i = buckets[bucket]; i >= 0; last = i, i = entries[i].next) {
                    if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) {
                        if (last < 0) { // 删除的是头结点
                            buckets[bucket] = entries[i].next;
                        }
                        else { // 链表中中间结点的删除
                            entries[last].next = entries[i].next;
                        }
                        entries[i].hashCode = -1;
                        entries[i].next = freeList;
                        entries[i].key = default(TKey);
                        entries[i].value = default(TValue);
                        freeList = i;
                        freeCount++;
                        version++;
                        return true;
                    }
                }
            }
            return false;
        }
  • 字典元素的查找:根据键、值查找
  1. 按照键查找:时间复杂度O(1),利用hash碰撞而不是遍历数组,时间较快
        public bool ContainsKey(TKey key) 
        {
            return FindEntry(key) >= 0;
        }
        private int FindEntry(TKey key) 
        {
            if( key == null) {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
            }
            if (buckets != null) {
                int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
                for (int i = buckets[hashCode % buckets.Length]; i >= 0; i = entries[i].next) {
                    if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) return i;
                }
            }
            return -1;
        }
  1. 按照值查找:时间复杂度O(n),需要遍历数组中的所有元素,时间较慢
        public bool ContainsValue(TValue value) 
        {
            if (value == null) {
                for (int i = 0; i < count; i++) {
                    if (entries[i].hashCode >= 0 && entries[i].value == null) return true;
                }
            }
            else {
                EqualityComparer<TValue> c = EqualityComparer<TValue>.Default;
                for (int i = 0; i < count; i++) {
                    if (entries[i].hashCode >= 0 && c.Equals(entries[i].value, value)) return true;
                }
            }
            return false;
        }

基于.NET Framework 4.8

你可能感兴趣的:(Unity,C#)