【Jabberd2源码剖析系列 xhash】

xhash是jabberd2的哈希表, 并提供了迭代器用于遍历xhash.

 

解释一下结构体的命名, xht_struct意思是x hash tasble, xhn_struct意思是x hash node, 这样方便理解记忆.

xhn_struct的成员变量顾名思义, 不赘述.

xht_struct中, p是内存池, 负责node的分配等, zen是桶数组, free_list是回收的node内存, iter_bucket和iter_node被用于记录迭代器的位置.

typedef struct xhn_struct

{

    struct xhn_struct *next;

    struct xhn_struct *prev;

    const char *key;

    int keylen;

    void *val;

} *xhn, _xhn;



typedef struct xht_struct

{

    pool_t p;

    int prime;

    int dirty;

    int count;

    struct xhn_struct *zen;

    struct xhn_struct *free_list; // list of zaped elements to be reused.

    int iter_bucket;

    xhn iter_node;

    int *stat;

} *xht, _xht;



JABBERD2_API xht xhash_new(int prime);

JABBERD2_API void xhash_put(xht h, const char *key, void *val);

JABBERD2_API void xhash_putx(xht h, const char *key, int len, void *val);

JABBERD2_API void *xhash_get(xht h, const char *key);

JABBERD2_API void *xhash_getx(xht h, const char *key, int len);

JABBERD2_API void xhash_zap(xht h, const char *key);

JABBERD2_API void xhash_zapx(xht h, const char *key, int len);

JABBERD2_API void xhash_stat(xht h);

JABBERD2_API void xhash_free(xht h);

typedef void (*xhash_walker)(const char *key, int keylen, void *val, void *arg);

JABBERD2_API void xhash_walk(xht h, xhash_walker w, void *arg);

JABBERD2_API int xhash_dirty(xht h);

JABBERD2_API int xhash_count(xht h);

JABBERD2_API pool_t xhash_pool(xht h);



/* iteration functions */

JABBERD2_API int xhash_iter_first(xht h);

JABBERD2_API int xhash_iter_next(xht h);

JABBERD2_API void xhash_iter_zap(xht h);

JABBERD2_API int xhash_iter_get(xht h, const char **key, int *keylen, void **val);

首先是二进制哈希函数, 会根据一段内存算出哈希值.

/* Generates a hash code for a string.

 * This function uses the ELF hashing algorithm as reprinted in 

 * Andrew Binstock, "Hashing Rehashed," Dr. Dobb's Journal, April 1996.

 */

static int _xhasher(const char *s, int len)

{

    /* ELF hash uses unsigned chars and unsigned arithmetic for portability */

    const unsigned char *name = (const unsigned char *)s;

    unsigned long h = 0, g;

    int i;



    for(i=0;i<len;i++)

    { /* do some fancy bitwanking on the string */

        h = (h << 4) + (unsigned long)(name[i]);

        if ((g = (h & 0xF0000000UL))!=0)

            h ^= (g >> 24);

        h &= ~g;



    }



    return (int)h;

}

xhash_new创建了一个预分配内存的pool, 尺寸满足了创建哈希桶数组以及哈希表自身, Prime是哈希桶的个数, 从命名来看作者希望传入的prime是素数, 但这在实现上来说并不是必须的.

xht xhash_new(int prime)

{

    xht xnew;

    pool_t p;



/*    log_debug(ZONE,"creating new hash table of size %d",prime); */



    /** 

     * NOTE:

     * all xhash's memory should be allocated from the pool by using pmalloco()/pmallocx(),

     * so that the xhash_free() can just call pool_free() simply.

     */

    

    p = pool_heap(sizeof(_xhn)*prime + sizeof(_xht));

    xnew = pmalloco(p, sizeof(_xht));

    xnew->prime = prime;

    xnew->p = p;

    xnew->zen = pmalloco(p, sizeof(_xhn)*prime); /* array of xhn size of prime */



    xnew->free_list = NULL;

        

    xnew->iter_bucket = -1; 

    xnew->iter_node = NULL;



#ifdef XHASH_DEBUG

    xnew->stat = pmalloco(p, sizeof(int)*prime );

#else

    xnew->stat = NULL;

#endif



    return xnew;

}

释放xhash则直接释放pool即可, 这一点不必多说...

void xhash_free(xht h)

{

/*    log_debug(ZONE,"hash free %X",h); */



    /// want to do more things? Please see the note in xhash_new() first.

    if(h) pool_free(h->p);



}

分配node采用的如下方法: 在实现上可能有一点迷惑性, 需要注意到哈希桶是实实在在分配了内存的数组, 而不是指针数组, 所以创建一个新的node时, 会先检查哈希桶那个Node是否被使用了, 如果没有使用则直接返回给用户使用. 否则, 需要另外获取一个新的Node, 此时优先检查free_list, 没有free_list则pmalloc重新分配一个node, 之后将该node插入到哈希桶的第一个结点之后(第一个结点是静态分配的). 

另外, 哈希桶链表是双向的.

static xhn _xhash_node_new(xht h, int index)

{

    xhn n;

    int i = index % h->prime;



    /* track total */

    h->count++;



#ifdef XHASH_DEBUG

    h->stat[i]++;

#endif



    // if the zen[i] is empty, reuse it, else get a new one.

    n = &h->zen[i];



    if( n->key != NULL )

    {

        if( h->free_list )

        {

            n = h->free_list;

            h->free_list = h->free_list->next;

        }else

            n = pmalloco(h->p, sizeof(_xhn));



        //add it to the bucket list head.

        n->prev = &h->zen[i];

        n->next = h->zen[i].next;



        if( n->next ) n->next->prev = n;

        h->zen[i].next = n;

    }



    return n;

}

下面的函数给定哈希值index, 将会定位到特定的哈希桶里顺序查找给定的key, 特别注意到, n->key != NULL 的判断, 一方面哈希桶的第一个node用key = NULL来表示未被使用, 另一方面, 当删除一个正在被迭代器指向的node时, 为了不影响接下来的迭代, 也会令key=NULL来表示删除.

static xhn _xhash_node_get(xht h, const char *key, int len, int index)

{

    xhn n;

    int i = index % h->prime;

    for(n = &h->zen[i]; n != NULL; n = n->next)

        if(n->key != NULL && (n->keylen==len) && (strncmp(key, n->key, len) == 0))

            return n;

    return NULL;

}

 插入一个元素到哈希表, 采用如下接口: 先_xhasher计算出key的哈希值index, 之后_xhash_node_get查找该key是否已经存在,如果已存在则直接替换其中的内容即可返回.

如果不存在, 则分配一个node(从free_list 或者 pool 中), 赋值其中的内容即可. 

两个接口的区别就是: 后者调用前者, 前者支持指定key的长度, 但实际上, 我发现这个哈希表只能支持字符串key, 因为_xhash_node_get里竟然用的是strncmp, 并且xhash_put里也是strlen计算的key长度.

void xhash_putx(xht h, const char *key, int len, void *val)                                                                                                            

{

    int index;

    xhn n;



    if(h == NULL || key == NULL)

        return;



    index = _xhasher(key,len);



    /* dirty the xht */

    h->dirty++;



    /* if existing key, replace it */

    if((n = _xhash_node_get(h, key, len, index)) != NULL)

    {

/*        log_debug(ZONE,"replacing %s with new val %X",key,val); */



        n->key = key;

        n->keylen = len;

        n->val = val;

        return;

    }



/*    log_debug(ZONE,"saving %s val %X",key,val); */



    /* new node */

    n = _xhash_node_new(h, index);

    n->key = key;

    n->keylen = len;

    n->val = val;

}



void xhash_put(xht h, const char *key, void *val)

{

    if(h == NULL || key == NULL) return;

    xhash_putx(h,key,strlen(key),val);

}

查询更加简单, 内部调用了上面的_xhash_node_get, 并做了一些参数校验.

void *xhash_getx(xht h, const char *key, int len)

{

    xhn n;



    if(h == NULL || key == NULL || len <= 0 || (n = _xhash_node_get(h, key, len, _xhasher(key,len))) == NULL)

    {

/*        log_debug(ZONE,"failed lookup of %s",key); */

        return NULL;

    }



/*    log_debug(ZONE,"found %s returning %X",key,n->val); */

    return n->val;

}                                                                                                                                                                      



void *xhash_get(xht h, const char *key)

{

    if(h == NULL || key == NULL) return NULL;

    return xhash_getx(h,key,strlen(key));

}

删除一个指定的key: 后者调用前者, 主要是操纵双向链表, 并且需要照顾到迭代器是否指向了要删除的node.

如果要删除的node不是哈希桶的那个静态结点(不需要删除, key=NULL就可以表示删除了), 并且也不是当前迭代到的结点, 那么就移除并插到free_list头部. 

对于哈希桶第一个静态Node与被迭代器指向的Node, 作者简单的令key=NULL表示删除, 仅此而已.

void xhash_zap_inner( xht h, xhn n, int index)

{

    int i = index % h->prime;



    // if element:n is in bucket list and it's not the current iter

    if( &h->zen[i] != n && h->iter_node != n )

    {

        if(n->prev) n->prev->next = n->next;

        if(n->next) n->next->prev = n->prev;



        // add it to the free_list head.

        n->prev = NULL;

        n->next = h->free_list;

        h->free_list = n;

    }



    //empty the value.

    n->key = NULL;

    n->val = NULL;



    /* dirty the xht and track the total */

    h->dirty++;

    h->count--;



#ifdef XHASH_DEBUG

    h->stat[i]--;

#endif

}



void xhash_zapx(xht h, const char *key, int len)

{

    xhn n;

    int index;



    if( !h || !key ) return;

    

    index = _xhasher(key,len);

    n = _xhash_node_get(h, key, len, index);

    if( !n ) return;



/*    log_debug(ZONE,"zapping %s",key); */



    xhash_zap_inner(h ,n, index );

}

下面是一些比较杂的函数, 其中xhash_dirty返回的dirty值是在每次插入与删除node时+1的, 在这里还看不出它的具体用途.

/** return the dirty flag (and reset) */

int xhash_dirty(xht h)

{

    int dirty;



    if(h == NULL) return 1;



    dirty = h->dirty;

    h->dirty = 0;

    return dirty;

}



/** return the total number of entries in this xht */

int xhash_count(xht h)

{

    if(h == NULL) return 0;



    return h->count;

}



/** get our pool */

pool_t xhash_pool(xht h)

{

    return h->p;

}

xhash提供了一个遍历哈希表的接口, 允许用户指定回调函数与自定义数据, 原理很简单:

void xhash_walk(xht h, xhash_walker w, void *arg)

{

    int i;

    xhn n;



    if(h == NULL || w == NULL)

        return;



/*    log_debug(ZONE,"walking %X",h); */



    for(i = 0; i < h->prime; i++)

        for(n = &h->zen[i]; n != NULL; n = n->next)

            if(n->key != NULL && n->val != NULL)

                (*w)(n->key, n->keylen, n->val, arg);

}

剩下的是迭代器: 迭代器一方面提供了传统的迭代访问元素的方式, 另一方面其内部也在迭代的过程中回收了那些Key=NULL 或者val=NULL的正常node, 这些node是因为上一次迭代过程中zap删除迭代器指向的node引起的, 在这次迭代过程中将被回收到free_list中.

初始化迭代器: 令iter_bucket和iter_node为初始化状态, 前者表示当前迭代哪个桶, 后者表示迭代哪个结点. 最后会调用xhash_iter_next将迭代器挪到第一个Node.

/** iteration */

int xhash_iter_first(xht h) {

    if(h == NULL) return 0;



    h->iter_bucket = -1;

    h->iter_node = NULL;



    return xhash_iter_next(h);

}

令迭代器前进: 先让迭代node指向下一个node, 如果node为空, 那么说明当前的桶内没有node了, 必须迭代下一个哈希桶, 并在新的桶内找到一个key!=NULL&&val!=NULL的Node. 如果当前桶内还有剩余node, 那么令迭代node(iter_node)指向下一个node, 这里有一个while循环, 目的是因为可能迭代的node是之前被半删除的node, 这里会将它们回收到free_list中, 或者遇到一个key!=NULL&val!=NULL的Node则返回.

注:此处可以看出为什么初始化迭代器设置iter_node =NULL, iter_bucket= -1的原因.

int xhash_iter_next(xht h) {

    if(h == NULL) return 0;



    /* next in this bucket */

    h->iter_node = h->iter_node ? h->iter_node->next : NULL;

    while(h->iter_node != NULL) {

        xhn n = h->iter_node;



        if(n->key != NULL && n->val != NULL)

            return 1;



        h->iter_node = n->next;



        if (n != &h->zen[h->iter_bucket]) {

            if(n->prev) n->prev->next = n->next;

            if(n->next) n->next->prev = n->prev;



            // add it to the free_list head.

            n->prev = NULL;

            n->next = h->free_list;

            h->free_list = n;

        }

    }



    /* next bucket */

    for(h->iter_bucket++; h->iter_bucket < h->prime; h->iter_bucket++) {

        h->iter_node = &h->zen[h->iter_bucket];



        while(h->iter_node != NULL) {

            if(h->iter_node->key != NULL && h->iter_node->val != NULL)

                return 1;



            h->iter_node = h->iter_node->next;

        }

    }



    /* there is no next */

    h->iter_bucket = -1;

    h->iter_node = NULL;



    return 0;

}  

剩下的是删除迭代器指向的Node: 此处不会真正的删除该Node, 只会令key =NULL, 并在下一轮新的迭代过程中被发现并回收到free_list. 之所以不删除是因为会影响接下来的迭代, 作者这样实现迭代器的删除并不是不能实现的更直接, 而是一种对C++ map迭代器类似的原则, 将迭代器正确操作的责任交给使用者.

void xhash_iter_zap(xht h)

{

    int index;



    if( !h || !h->iter_node ) return;



    index = _xhasher( h->iter_node->key, h->iter_node->keylen );



    xhash_zap_inner( h ,h->iter_node, index);

}

最后一个接口, 允许用户获取当前迭代器指向node的key和val, 传入的都是指针的地址: 这里严格判断, 一个空的容器的迭代器永远iter_node = NULL, 所以要判断仔细.

下面就是把用户需要的内容返回给用户.

int xhash_iter_get(xht h, const char **key, int *keylen, void **val) {

    if(h == NULL || (key == NULL && val == NULL) || (key != NULL && keylen == NULL)) return 0;



    if(h->iter_node == NULL) {

        if(key != NULL) *key = NULL;

        if(val != NULL) *val = NULL;

        return 0;

    }



    if(key != NULL) {

        *key = h->iter_node->key;

        *keylen = h->iter_node->keylen;

    }

    if(val != NULL) *val = h->iter_node->val;



    return 1;

}

 

在xhash的插入操作中, 并没有看到预想的pstrdup(key)的操作, 作者将key的副本生成的责任交给了用户自己, 而xhash内部的pool只负责分配xht, xhn的内存.

你可能感兴趣的:(hash)