xhash是jabberd2的哈希表, 并提供了迭代器用于遍历xhash.
解释一下结构体的命名, xht_struct意思是x hash tasble, xhn_struct意思是x hash node, 这样方便理解记忆.
xhn_struct的成员变量顾名思义, 不赘述.
xht_struct中, p是内存池, 负责node的分配等, zen是桶数组, free_list是回收的node内存, iter_bucket和iter_node被用于记录迭代器的位置.
typedef struct xhn_struct { struct xhn_struct *next; struct xhn_struct *prev; const char *key; int keylen; void *val; } *xhn, _xhn; typedef struct xht_struct { pool_t p; int prime; int dirty; int count; struct xhn_struct *zen; struct xhn_struct *free_list; // list of zaped elements to be reused. int iter_bucket; xhn iter_node; int *stat; } *xht, _xht; JABBERD2_API xht xhash_new(int prime); JABBERD2_API void xhash_put(xht h, const char *key, void *val); JABBERD2_API void xhash_putx(xht h, const char *key, int len, void *val); JABBERD2_API void *xhash_get(xht h, const char *key); JABBERD2_API void *xhash_getx(xht h, const char *key, int len); JABBERD2_API void xhash_zap(xht h, const char *key); JABBERD2_API void xhash_zapx(xht h, const char *key, int len); JABBERD2_API void xhash_stat(xht h); JABBERD2_API void xhash_free(xht h); typedef void (*xhash_walker)(const char *key, int keylen, void *val, void *arg); JABBERD2_API void xhash_walk(xht h, xhash_walker w, void *arg); JABBERD2_API int xhash_dirty(xht h); JABBERD2_API int xhash_count(xht h); JABBERD2_API pool_t xhash_pool(xht h); /* iteration functions */ JABBERD2_API int xhash_iter_first(xht h); JABBERD2_API int xhash_iter_next(xht h); JABBERD2_API void xhash_iter_zap(xht h); JABBERD2_API int xhash_iter_get(xht h, const char **key, int *keylen, void **val);
首先是二进制哈希函数, 会根据一段内存算出哈希值.
/* Generates a hash code for a string. * This function uses the ELF hashing algorithm as reprinted in * Andrew Binstock, "Hashing Rehashed," Dr. Dobb's Journal, April 1996. */ static int _xhasher(const char *s, int len) { /* ELF hash uses unsigned chars and unsigned arithmetic for portability */ const unsigned char *name = (const unsigned char *)s; unsigned long h = 0, g; int i; for(i=0;i<len;i++) { /* do some fancy bitwanking on the string */ h = (h << 4) + (unsigned long)(name[i]); if ((g = (h & 0xF0000000UL))!=0) h ^= (g >> 24); h &= ~g; } return (int)h; }
xhash_new创建了一个预分配内存的pool, 尺寸满足了创建哈希桶数组以及哈希表自身, Prime是哈希桶的个数, 从命名来看作者希望传入的prime是素数, 但这在实现上来说并不是必须的.
xht xhash_new(int prime) { xht xnew; pool_t p; /* log_debug(ZONE,"creating new hash table of size %d",prime); */ /** * NOTE: * all xhash's memory should be allocated from the pool by using pmalloco()/pmallocx(), * so that the xhash_free() can just call pool_free() simply. */ p = pool_heap(sizeof(_xhn)*prime + sizeof(_xht)); xnew = pmalloco(p, sizeof(_xht)); xnew->prime = prime; xnew->p = p; xnew->zen = pmalloco(p, sizeof(_xhn)*prime); /* array of xhn size of prime */ xnew->free_list = NULL; xnew->iter_bucket = -1; xnew->iter_node = NULL; #ifdef XHASH_DEBUG xnew->stat = pmalloco(p, sizeof(int)*prime ); #else xnew->stat = NULL; #endif return xnew; }
释放xhash则直接释放pool即可, 这一点不必多说...
void xhash_free(xht h) { /* log_debug(ZONE,"hash free %X",h); */ /// want to do more things? Please see the note in xhash_new() first. if(h) pool_free(h->p); }
分配node采用的如下方法: 在实现上可能有一点迷惑性, 需要注意到哈希桶是实实在在分配了内存的数组, 而不是指针数组, 所以创建一个新的node时, 会先检查哈希桶那个Node是否被使用了, 如果没有使用则直接返回给用户使用. 否则, 需要另外获取一个新的Node, 此时优先检查free_list, 没有free_list则pmalloc重新分配一个node, 之后将该node插入到哈希桶的第一个结点之后(第一个结点是静态分配的).
另外, 哈希桶链表是双向的.
static xhn _xhash_node_new(xht h, int index) { xhn n; int i = index % h->prime; /* track total */ h->count++; #ifdef XHASH_DEBUG h->stat[i]++; #endif // if the zen[i] is empty, reuse it, else get a new one. n = &h->zen[i]; if( n->key != NULL ) { if( h->free_list ) { n = h->free_list; h->free_list = h->free_list->next; }else n = pmalloco(h->p, sizeof(_xhn)); //add it to the bucket list head. n->prev = &h->zen[i]; n->next = h->zen[i].next; if( n->next ) n->next->prev = n; h->zen[i].next = n; } return n; }
下面的函数给定哈希值index, 将会定位到特定的哈希桶里顺序查找给定的key, 特别注意到, n->key != NULL 的判断, 一方面哈希桶的第一个node用key = NULL来表示未被使用, 另一方面, 当删除一个正在被迭代器指向的node时, 为了不影响接下来的迭代, 也会令key=NULL来表示删除.
static xhn _xhash_node_get(xht h, const char *key, int len, int index) { xhn n; int i = index % h->prime; for(n = &h->zen[i]; n != NULL; n = n->next) if(n->key != NULL && (n->keylen==len) && (strncmp(key, n->key, len) == 0)) return n; return NULL; }
插入一个元素到哈希表, 采用如下接口: 先_xhasher计算出key的哈希值index, 之后_xhash_node_get查找该key是否已经存在,如果已存在则直接替换其中的内容即可返回.
如果不存在, 则分配一个node(从free_list 或者 pool 中), 赋值其中的内容即可.
两个接口的区别就是: 后者调用前者, 前者支持指定key的长度, 但实际上, 我发现这个哈希表只能支持字符串key, 因为_xhash_node_get里竟然用的是strncmp, 并且xhash_put里也是strlen计算的key长度.
void xhash_putx(xht h, const char *key, int len, void *val) { int index; xhn n; if(h == NULL || key == NULL) return; index = _xhasher(key,len); /* dirty the xht */ h->dirty++; /* if existing key, replace it */ if((n = _xhash_node_get(h, key, len, index)) != NULL) { /* log_debug(ZONE,"replacing %s with new val %X",key,val); */ n->key = key; n->keylen = len; n->val = val; return; } /* log_debug(ZONE,"saving %s val %X",key,val); */ /* new node */ n = _xhash_node_new(h, index); n->key = key; n->keylen = len; n->val = val; } void xhash_put(xht h, const char *key, void *val) { if(h == NULL || key == NULL) return; xhash_putx(h,key,strlen(key),val); }
查询更加简单, 内部调用了上面的_xhash_node_get, 并做了一些参数校验.
void *xhash_getx(xht h, const char *key, int len) { xhn n; if(h == NULL || key == NULL || len <= 0 || (n = _xhash_node_get(h, key, len, _xhasher(key,len))) == NULL) { /* log_debug(ZONE,"failed lookup of %s",key); */ return NULL; } /* log_debug(ZONE,"found %s returning %X",key,n->val); */ return n->val; } void *xhash_get(xht h, const char *key) { if(h == NULL || key == NULL) return NULL; return xhash_getx(h,key,strlen(key)); }
删除一个指定的key: 后者调用前者, 主要是操纵双向链表, 并且需要照顾到迭代器是否指向了要删除的node.
如果要删除的node不是哈希桶的那个静态结点(不需要删除, key=NULL就可以表示删除了), 并且也不是当前迭代到的结点, 那么就移除并插到free_list头部.
对于哈希桶第一个静态Node与被迭代器指向的Node, 作者简单的令key=NULL表示删除, 仅此而已.
void xhash_zap_inner( xht h, xhn n, int index) { int i = index % h->prime; // if element:n is in bucket list and it's not the current iter if( &h->zen[i] != n && h->iter_node != n ) { if(n->prev) n->prev->next = n->next; if(n->next) n->next->prev = n->prev; // add it to the free_list head. n->prev = NULL; n->next = h->free_list; h->free_list = n; } //empty the value. n->key = NULL; n->val = NULL; /* dirty the xht and track the total */ h->dirty++; h->count--; #ifdef XHASH_DEBUG h->stat[i]--; #endif } void xhash_zapx(xht h, const char *key, int len) { xhn n; int index; if( !h || !key ) return; index = _xhasher(key,len); n = _xhash_node_get(h, key, len, index); if( !n ) return; /* log_debug(ZONE,"zapping %s",key); */ xhash_zap_inner(h ,n, index ); }
下面是一些比较杂的函数, 其中xhash_dirty返回的dirty值是在每次插入与删除node时+1的, 在这里还看不出它的具体用途.
/** return the dirty flag (and reset) */ int xhash_dirty(xht h) { int dirty; if(h == NULL) return 1; dirty = h->dirty; h->dirty = 0; return dirty; } /** return the total number of entries in this xht */ int xhash_count(xht h) { if(h == NULL) return 0; return h->count; } /** get our pool */ pool_t xhash_pool(xht h) { return h->p; }
xhash提供了一个遍历哈希表的接口, 允许用户指定回调函数与自定义数据, 原理很简单:
void xhash_walk(xht h, xhash_walker w, void *arg) { int i; xhn n; if(h == NULL || w == NULL) return; /* log_debug(ZONE,"walking %X",h); */ for(i = 0; i < h->prime; i++) for(n = &h->zen[i]; n != NULL; n = n->next) if(n->key != NULL && n->val != NULL) (*w)(n->key, n->keylen, n->val, arg); }
剩下的是迭代器: 迭代器一方面提供了传统的迭代访问元素的方式, 另一方面其内部也在迭代的过程中回收了那些Key=NULL 或者val=NULL的正常node, 这些node是因为上一次迭代过程中zap删除迭代器指向的node引起的, 在这次迭代过程中将被回收到free_list中.
初始化迭代器: 令iter_bucket和iter_node为初始化状态, 前者表示当前迭代哪个桶, 后者表示迭代哪个结点. 最后会调用xhash_iter_next将迭代器挪到第一个Node.
/** iteration */ int xhash_iter_first(xht h) { if(h == NULL) return 0; h->iter_bucket = -1; h->iter_node = NULL; return xhash_iter_next(h); }
令迭代器前进: 先让迭代node指向下一个node, 如果node为空, 那么说明当前的桶内没有node了, 必须迭代下一个哈希桶, 并在新的桶内找到一个key!=NULL&&val!=NULL的Node. 如果当前桶内还有剩余node, 那么令迭代node(iter_node)指向下一个node, 这里有一个while循环, 目的是因为可能迭代的node是之前被半删除的node, 这里会将它们回收到free_list中, 或者遇到一个key!=NULL&val!=NULL的Node则返回.
注:此处可以看出为什么初始化迭代器设置iter_node =NULL, iter_bucket= -1的原因.
int xhash_iter_next(xht h) { if(h == NULL) return 0; /* next in this bucket */ h->iter_node = h->iter_node ? h->iter_node->next : NULL; while(h->iter_node != NULL) { xhn n = h->iter_node; if(n->key != NULL && n->val != NULL) return 1; h->iter_node = n->next; if (n != &h->zen[h->iter_bucket]) { if(n->prev) n->prev->next = n->next; if(n->next) n->next->prev = n->prev; // add it to the free_list head. n->prev = NULL; n->next = h->free_list; h->free_list = n; } } /* next bucket */ for(h->iter_bucket++; h->iter_bucket < h->prime; h->iter_bucket++) { h->iter_node = &h->zen[h->iter_bucket]; while(h->iter_node != NULL) { if(h->iter_node->key != NULL && h->iter_node->val != NULL) return 1; h->iter_node = h->iter_node->next; } } /* there is no next */ h->iter_bucket = -1; h->iter_node = NULL; return 0; }
剩下的是删除迭代器指向的Node: 此处不会真正的删除该Node, 只会令key =NULL, 并在下一轮新的迭代过程中被发现并回收到free_list. 之所以不删除是因为会影响接下来的迭代, 作者这样实现迭代器的删除并不是不能实现的更直接, 而是一种对C++ map迭代器类似的原则, 将迭代器正确操作的责任交给使用者.
void xhash_iter_zap(xht h) { int index; if( !h || !h->iter_node ) return; index = _xhasher( h->iter_node->key, h->iter_node->keylen ); xhash_zap_inner( h ,h->iter_node, index); }
最后一个接口, 允许用户获取当前迭代器指向node的key和val, 传入的都是指针的地址: 这里严格判断, 一个空的容器的迭代器永远iter_node = NULL, 所以要判断仔细.
下面就是把用户需要的内容返回给用户.
int xhash_iter_get(xht h, const char **key, int *keylen, void **val) { if(h == NULL || (key == NULL && val == NULL) || (key != NULL && keylen == NULL)) return 0; if(h->iter_node == NULL) { if(key != NULL) *key = NULL; if(val != NULL) *val = NULL; return 0; } if(key != NULL) { *key = h->iter_node->key; *keylen = h->iter_node->keylen; } if(val != NULL) *val = h->iter_node->val; return 1; }
在xhash的插入操作中, 并没有看到预想的pstrdup(key)的操作, 作者将key的副本生成的责任交给了用户自己, 而xhash内部的pool只负责分配xht, xhn的内存.