先介绍Redis散列表实现的几个重要数据结构:
字典项DictEntry:
typedef struct dictEntry { void *key; void *val; struct dictEntry *next; } dictEntry;
typedef struct dictType { unsigned int (*hashFunction)(const void *key); void *(*keyDup)(void *privdata, const void *key); void *(*valDup)(void *privdata, const void *obj); int (*keyCompare)(void *privdata, const void *key1, const void *key2); void (*keyDestructor)(void *privdata, void *key); void (*valDestructor)(void *privdata, void *obj); } dictType;
/* This is our hash table structure. Every dictionary has two of this as we * implement incremental rehashing, for the old to the new table. */ typedef struct dictht { dictEntry **table; unsigned long size; unsigned long sizemask; unsigned long used; } dictht;
字典结构dict:
typedef struct dict { dictType *type; void *privdata; dictht ht[2]; int rehashidx; /* rehashing not in progress if rehashidx == -1 */ int iterators; /* number of iterators currently running */ } dict;
/* If safe is set to 1 this is a safe iteartor, that means, you can call * dictAdd, dictFind, and other functions against the dictionary even while * iterating. Otherwise it is a non safe iterator, and only dictNext() * should be called while iterating. */ typedef struct dictIterator { dict *d; int table, index, safe; dictEntry *entry, *nextEntry; } dictIterator;
dict *dictCreate(dictType *type, void *privDataPtr); int dictExpand(dict *d, unsigned long size); int dictAdd(dict *d, void *key, void *val); int dictReplace(dict *d, void *key, void *val); int dictDelete(dict *d, const void *key); int dictDeleteNoFree(dict *d, const void *key); void dictRelease(dict *d); dictEntry * dictFind(dict *d, const void *key); void *dictFetchValue(dict *d, const void *key); int dictResize(dict *d); dictIterator *dictGetIterator(dict *d); dictIterator *dictGetSafeIterator(dict *d); dictEntry *dictNext(dictIterator *iter); void dictReleaseIterator(dictIterator *iter); dictEntry *dictGetRandomKey(dict *d); void dictPrintStats(dict *d); unsigned int dictGenHashFunction(const unsigned char *buf, int len); unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len); void dictEmpty(dict *d); void dictEnableResize(void); void dictDisableResize(void); int dictRehash(dict *d, int n); int dictRehashMilliseconds(dict *d, int ms);
下面分析扩张或者创建哈希表的重要函数dictExpand:
/* Expand or create the hashtable */ int dictExpand(dict *d, unsigned long size) { dictht n; /* the new hashtable */ unsigned long realsize = _dictNextPower(size); /* the size is invalid if it is smaller than the number of * elements already inside the hashtable */ if (dictIsRehashing(d) || d->ht[0].used > size) return DICT_ERR; /* Allocate the new hashtable and initialize all pointers to NULL */ n.size = realsize; n.sizemask = realsize-1; n.table = zcalloc(realsize*sizeof(dictEntry*)); n.used = 0; /* Is this the first initialization? If so it's not really a rehashing * we just set the first hash table so that it can accept keys. */ if (d->ht[0].table == NULL) { d->ht[0] = n; return DICT_OK; } /* Prepare a second hash table for incremental rehashing */ d->ht[1] = n; d->rehashidx = 0; return DICT_OK; }
对于传入的参数:新哈希表的大小size,首先调用内部函数_dictNextPower(size)取得大于size的最小2次幂整数,作为哈希表大小。掩码sizemask为size二进制表示长度减一的全1表示。调用内存管理函数zcalloc分配新哈希表的内存。
接下来,函数判断这是否是哈希表的首次初始化,这通过判断字典的哈希表数组ht的首个元素的dictEntry是否为空实现,如果为空,说明是首次初始化,则将该哈希表的size设为n,直接返回DICT_OK;否则,说明这是一次rehash,那么函数将准备第二个哈希表d->ht[1],并将d的rehashidx设为0,准备进行后续的增量哈希,然后返回DICT_OK。
下面分析再哈希的实现dictRehash函数:
/* Performs N steps of incremental rehashing. Returns 1 if there are still * keys to move from the old to the new hash table, otherwise 0 is returned. * Note that a rehashing step consists in moving a bucket (that may have more * thank one key as we use chaining) from the old to the new hash table. */ int dictRehash(dict *d, int n) { if (!dictIsRehashing(d)) return 0; while(n--) { dictEntry *de, *nextde; /* Check if we already rehashed the whole table... */ if (d->ht[0].used == 0) { zfree(d->ht[0].table); d->ht[0] = d->ht[1]; _dictReset(&d->ht[1]); d->rehashidx = -1; return 0; } /* Note that rehashidx can't overflow as we are sure there are more * elements because ht[0].used != 0 */ while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++; de = d->ht[0].table[d->rehashidx]; /* Move all the keys in this bucket from the old to the new hash HT */ while(de) { unsigned int h; nextde = de->next; /* Get the index in the new hash table */ h = dictHashKey(d, de->key) & d->ht[1].sizemask; de->next = d->ht[1].table[h]; d->ht[1].table[h] = de; d->ht[0].used--; d->ht[1].used++; de = nextde; } d->ht[0].table[d->rehashidx] = NULL; d->rehashidx++; } return 1; }首先判断这是否是一次合法的rehash调用,通过判断(ht)->rehashidx!=-1实现。
然后,进行n步rehash。其中的每一步都重复如下步骤:
(1) 检查我们是否已经rehash了整个哈希表(此时d->ht[0].used为0),如果是,析构旧的哈希表,将d->rehashidx置为-1。
(2) 遍历哈希表d->ht[0],直到找到非空的字典项de,然后此后通过de->next继续遍历。在此之前,通过位操作dictHashKey(d, de->key) & d->ht[1].sizemask获得在新哈希表d->ht[1]中的索引h,并将该字典项复制到新哈希表d->ht[1],同时更新两个哈希表的d->ht[i].used计数。然后将最初de对应的rehashidx对应的字典项标记为NULL,将->rehashidx加1,然后重复(1)层的循环。