前面的讲述了如何用链地址法实现一个哈希表,那么今天来分析一下另一种解决哈希冲突的做法,即为每个Hash值,建立一个Hash桶(Bucket),桶的容量是固定的,也就是只能处理固定次数的冲突,如1048576个Hash桶,每个桶中有4个表项(Entry),总计4M个表项。其实这两种的实现思路雷同,就是对Hash表中每个Hash值建立一个冲突表,即将冲突的几个记录以表的形式存储在其中。
大致的思路是这样的:
首先哈希桶的个数是固定的,有用户构建的时候输入,一旦构建,个数就已经固定;查找的时候首先将key值通过哈希函数获取哈希值,根据哈希值获取到对应的哈希桶,然后遍历哈希桶内的pairs数组获取。
主要的数据结构:
[cpp] view plain
copy
print ?
- struct Pair {
- char *key;
- char *value;
- };
-
- struct Bucket {
- unsigned int count;
- Pair *pairs;
- };
-
- struct StrMap {
- unsigned int count;
- Bucket *buckets;
- };
struct Pair {
char *key;
char *value;
};
struct Bucket {
unsigned int count;
Pair *pairs;
};
struct StrMap {
unsigned int count;
Bucket *buckets;
};
本小节主要是学习一下国外大牛是如何实现哈希表的。完整的代码,请看: 这里
,一位圣安德鲁斯大学的讲师: KRISTENSSON
博客
strmap.h文件
[java] view plain
copy
print ?
- #ifndef _STRMAP_H_
- #define _STRMAP_H_
-
- #ifdef __cplusplus
- extern "C"
- {
- #endif
-
- #include <stdlib.h>
- #include <string.h>
-
- typedef struct StrMap StrMap;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- typedef void(*sm_enum_func)(const char *key, const char *value, const void *obj);
-
-
-
-
-
-
-
-
-
-
-
-
- StrMap * sm_new(unsigned int capacity);
-
-
-
-
-
-
-
-
-
-
-
-
- void sm_delete(StrMap *map);
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- int sm_get(const StrMap *map, const char *key, char *out_buf, unsigned int n_out_buf);
-
-
-
-
-
-
-
-
-
-
-
-
-
- int sm_exists(const StrMap *map, const char *key);
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- int sm_put(StrMap *map, const char *key, const char *value);
-
-
-
-
-
-
-
-
-
-
- int sm_get_count(const StrMap *map);
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- int sm_enum(const StrMap *map, sm_enum_func enum_func, const void *obj);
-
- #ifdef __cplusplus
- }
- #endif
-
- #endif
#ifndef _STRMAP_H_
#define _STRMAP_H_
#ifdef __cplusplus
extern "C"
{
#endif
#include <stdlib.h>
#include <string.h>
typedef struct StrMap StrMap;
/*
* This callback function is called once per key-value when iterating over
* all keys associated to values.
*
* Parameters:
*
* key: A pointer to a null-terminated C string. The string must not
* be modified by the client.
*
* value: A pointer to a null-terminated C string. The string must
* not be modified by the client.
*
* obj: A pointer to a client-specific object. This parameter may be
* null.
*
* Return value: None.
*/
typedef void(*sm_enum_func)(const char *key, const char *value, const void *obj);
/*
* Creates a string map.
*
* Parameters:
*
* capacity: The number of top-level slots this string map
* should allocate. This parameter must be > 0.
*
* Return value: A pointer to a string map object,
* or null if a new string map could not be allocated.
*/
StrMap * sm_new(unsigned int capacity);
/*
* Releases all memory held by a string map object.
*
* Parameters:
*
* map: A pointer to a string map. This parameter cannot be null.
* If the supplied string map has been previously released, the
* behaviour of this function is undefined.
*
* Return value: None.
*/
void sm_delete(StrMap *map);
/*
* Returns the value associated with the supplied key.
*
* Parameters:
*
* map: A pointer to a string map. This parameter cannot be null.
*
* key: A pointer to a null-terminated C string. This parameter cannot
* be null.
*
* out_buf: A pointer to an output buffer which will contain the value,
* if it exists and fits into the buffer.
*
* n_out_buf: The size of the output buffer in bytes.
*
* Return value: If out_buf is set to null and n_out_buf is set to 0 the return
* value will be the number of bytes required to store the value (if it exists)
* and its null-terminator. For all other parameter configurations the return value
* is 1 if an associated value was found and completely copied into the output buffer,
* 0 otherwise.
*/
int sm_get(const StrMap *map, const char *key, char *out_buf, unsigned int n_out_buf);
/*
* Queries the existence of a key.
*
* Parameters:
*
* map: A pointer to a string map. This parameter cannot be null.
*
* key: A pointer to a null-terminated C string. This parameter cannot
* be null.
*
* Return value: 1 if the key exists, 0 otherwise.
*/
int sm_exists(const StrMap *map, const char *key);
/*
* Associates a value with the supplied key. If the key is already
* associated with a value, the previous value is replaced.
*
* Parameters:
*
* map: A pointer to a string map. This parameter cannot be null.
*
* key: A pointer to a null-terminated C string. This parameter
* cannot be null. The string must have a string length > 0. The
* string will be copied.
*
* value: A pointer to a null-terminated C string. This parameter
* cannot be null. The string must have a string length > 0. The
* string will be copied.
*
* Return value: 1 if the association succeeded, 0 otherwise.
*/
int sm_put(StrMap *map, const char *key, const char *value);
/*
* Returns the number of associations between keys and values.
*
* Parameters:
*
* map: A pointer to a string map. This parameter cannot be null.
*
* Return value: The number of associations between keys and values.
*/
int sm_get_count(const StrMap *map);
/*
* An enumerator over all associations between keys and values.
*
* Parameters:
*
* map: A pointer to a string map. This parameter cannot be null.
*
* enum_func: A pointer to a callback function that will be
* called by this procedure once for every key associated
* with a value. This parameter cannot be null.
*
* obj: A pointer to a client-specific object. This parameter will be
* passed back to the client's callback function. This parameter can
* be null.
*
* Return value: 1 if enumeration completed, 0 otherwise.
*/
int sm_enum(const StrMap *map, sm_enum_func enum_func, const void *obj);
#ifdef __cplusplus
}
#endif
#endif
strmap.c文件
[java] view plain
copy
print ?
- #include "strmap.h"
-
- typedef struct Pair Pair;
-
- typedef struct Bucket Bucket;
-
- struct Pair {
- char *key;
- char *value;
- };
-
- struct Bucket {
- unsigned int count;
- Pair *pairs;
- };
-
- struct StrMap {
- unsigned int count;
- Bucket *buckets;
- };
-
- static Pair * get_pair(Bucket *bucket, const char *key);
- static unsigned long hash(const char *str);
-
- StrMap * sm_new(unsigned int capacity)
- {
- StrMap *map;
-
- map = malloc(sizeof(StrMap));
- if (map == NULL) {
- return NULL;
- }
- map->count = capacity;
- map->buckets = malloc(map->count * sizeof(Bucket));
- if (map->buckets == NULL) {
- free(map);
- return NULL;
- }
- memset(map->buckets, 0, map->count * sizeof(Bucket));
- return map;
- }
-
- void sm_delete(StrMap *map)
- {
- unsigned int i, j, n, m;
- Bucket *bucket;
- Pair *pair;
-
- if (map == NULL) {
- return;
- }
- n = map->count;
- bucket = map->buckets;
- i = 0;
- while (i < n) {
- m = bucket->count;
- pair = bucket->pairs;
- j = 0;
- while(j < m) {
- free(pair->key);
- free(pair->value);
- pair++;
- j++;
- }
- free(bucket->pairs);
- bucket++;
- i++;
- }
- free(map->buckets);
- free(map);
- }
-
- int sm_get(const StrMap *map, const char *key, char *out_buf, unsigned int n_out_buf)
- {
- unsigned int index;
- Bucket *bucket;
- Pair *pair;
-
- if (map == NULL) {
- return 0;
- }
- if (key == NULL) {
- return 0;
- }
- index = hash(key) % map->count;
- bucket = &(map->buckets[index]);
- pair = get_pair(bucket, key);
- if (pair == NULL) {
- return 0;
- }
- if (out_buf == NULL && n_out_buf == 0) {
- return strlen(pair->value) + 1;
- }
- if (out_buf == NULL) {
- return 0;
- }
- if (strlen(pair->value) >= n_out_buf) {
- return 0;
- }
- strcpy(out_buf, pair->value);
- return 1;
- }
-
- int sm_exists(const StrMap *map, const char *key)
- {
- unsigned int index;
- Bucket *bucket;
- Pair *pair;
-
- if (map == NULL) {
- return 0;
- }
- if (key == NULL) {
- return 0;
- }
- index = hash(key) % map->count;
- bucket = &(map->buckets[index]);
- pair = get_pair(bucket, key);
- if (pair == NULL) {
- return 0;
- }
- return 1;
- }
-
- int sm_put(StrMap *map, const char *key, const char *value)
- {
- unsigned int key_len, value_len, index;
- Bucket *bucket;
- Pair *tmp_pairs, *pair;
- char *tmp_value;
- char *new_key, *new_value;
-
- if (map == NULL) {
- return 0;
- }
- if (key == NULL || value == NULL) {
- return 0;
- }
- key_len = strlen(key);
- value_len = strlen(value);
-
- index = hash(key) % map->count;
- bucket = &(map->buckets[index]);
-
-
-
- if ((pair = get_pair(bucket, key)) != NULL) {
-
-
-
- if (strlen(pair->value) < value_len) {
-
-
-
- tmp_value = realloc(pair->value, (value_len + 1) * sizeof(char));
- if (tmp_value == NULL) {
- return 0;
- }
- pair->value = tmp_value;
- }
-
- strcpy(pair->value, value);
- return 1;
- }
-
- new_key = malloc((key_len + 1) * sizeof(char));
- if (new_key == NULL) {
- return 0;
- }
- new_value = malloc((value_len + 1) * sizeof(char));
- if (new_value == NULL) {
- free(new_key);
- return 0;
- }
-
- if (bucket->count == 0) {
-
-
-
- bucket->pairs = malloc(sizeof(Pair));
- if (bucket->pairs == NULL) {
- free(new_key);
- free(new_value);
- return 0;
- }
- bucket->count = 1;
- }
- else {
-
-
-
- tmp_pairs = realloc(bucket->pairs, (bucket->count + 1) * sizeof(Pair));
- if (tmp_pairs == NULL) {
- free(new_key);
- free(new_value);
- return 0;
- }
- bucket->pairs = tmp_pairs;
- bucket->count++;
- }
-
- pair = &(bucket->pairs[bucket->count - 1]);
- pair->key = new_key;
- pair->value = new_value;
-
- strcpy(pair->key, key);
- strcpy(pair->value, value);
- return 1;
- }
-
- int sm_get_count(const StrMap *map)
- {
- unsigned int i, j, n, m;
- unsigned int count;
- Bucket *bucket;
- Pair *pair;
-
- if (map == NULL) {
- return 0;
- }
- bucket = map->buckets;
- n = map->count;
- i = 0;
- count = 0;
- while (i < n) {
- pair = bucket->pairs;
- m = bucket->count;
- j = 0;
- while (j < m) {
- count++;
- pair++;
- j++;
- }
- bucket++;
- i++;
- }
- return count;
- }
-
- int sm_enum(const StrMap *map, sm_enum_func enum_func, const void *obj)
- {
- unsigned int i, j, n, m;
- Bucket *bucket;
- Pair *pair;
-
- if (map == NULL) {
- return 0;
- }
- if (enum_func == NULL) {
- return 0;
- }
- bucket = map->buckets;
- n = map->count;
- i = 0;
- while (i < n) {
- pair = bucket->pairs;
- m = bucket->count;
- j = 0;
- while (j < m) {
- enum_func(pair->key, pair->value, obj);
- pair++;
- j++;
- }
- bucket++;
- i++;
- }
- return 1;
- }
-
-
-
-
-
- static Pair * get_pair(Bucket *bucket, const char *key)
- {
- unsigned int i, n;
- Pair *pair;
-
- n = bucket->count;
- if (n == 0) {
- return NULL;
- }
- pair = bucket->pairs;
- i = 0;
- while (i < n) {
- if (pair->key != NULL && pair->value != NULL) {
- if (strcmp(pair->key, key) == 0) {
- return pair;
- }
- }
- pair++;
- i++;
- }
- return NULL;
- }
-
-
-
-
- static unsigned long hash(const char *str)
- {
- unsigned long hash = 5381;
- int c;
-
- while (c = *str++) {
- hash = ((hash << 5) + hash) + c;
- }
- return hash;
- }
#include "strmap.h"
typedef struct Pair Pair;
typedef struct Bucket Bucket;
struct Pair {
char *key;
char *value;
};
struct Bucket {
unsigned int count;
Pair *pairs;
};
struct StrMap {
unsigned int count;
Bucket *buckets;
};
static Pair * get_pair(Bucket *bucket, const char *key);
static unsigned long hash(const char *str);
StrMap * sm_new(unsigned int capacity)
{
StrMap *map;
map = malloc(sizeof(StrMap));
if (map == NULL) {
return NULL;
}
map->count = capacity;
map->buckets = malloc(map->count * sizeof(Bucket));
if (map->buckets == NULL) {
free(map);
return NULL;
}
memset(map->buckets, 0, map->count * sizeof(Bucket));
return map;
}
void sm_delete(StrMap *map)
{
unsigned int i, j, n, m;
Bucket *bucket;
Pair *pair;
if (map == NULL) {
return;
}
n = map->count;
bucket = map->buckets;
i = 0;
while (i < n) {
m = bucket->count;
pair = bucket->pairs;
j = 0;
while(j < m) {
free(pair->key);
free(pair->value);
pair++;
j++;
}
free(bucket->pairs);
bucket++;
i++;
}
free(map->buckets);
free(map);
}
int sm_get(const StrMap *map, const char *key, char *out_buf, unsigned int n_out_buf)
{
unsigned int index;
Bucket *bucket;
Pair *pair;
if (map == NULL) {
return 0;
}
if (key == NULL) {
return 0;
}
index = hash(key) % map->count;
bucket = &(map->buckets[index]);
pair = get_pair(bucket, key);
if (pair == NULL) {
return 0;
}
if (out_buf == NULL && n_out_buf == 0) {
return strlen(pair->value) + 1;
}
if (out_buf == NULL) {
return 0;
}
if (strlen(pair->value) >= n_out_buf) {
return 0;
}
strcpy(out_buf, pair->value);
return 1;
}
int sm_exists(const StrMap *map, const char *key)
{
unsigned int index;
Bucket *bucket;
Pair *pair;
if (map == NULL) {
return 0;
}
if (key == NULL) {
return 0;
}
index = hash(key) % map->count;
bucket = &(map->buckets[index]);
pair = get_pair(bucket, key);
if (pair == NULL) {
return 0;
}
return 1;
}
int sm_put(StrMap *map, const char *key, const char *value)
{
unsigned int key_len, value_len, index;
Bucket *bucket;
Pair *tmp_pairs, *pair;
char *tmp_value;
char *new_key, *new_value;
if (map == NULL) {
return 0;
}
if (key == NULL || value == NULL) {
return 0;
}
key_len = strlen(key);
value_len = strlen(value);
/* Get a pointer to the bucket the key string hashes to */
index = hash(key) % map->count;
bucket = &(map->buckets[index]);
/* Check if we can handle insertion by simply replacing
* an existing value in a key-value pair in the bucket.
*/
if ((pair = get_pair(bucket, key)) != NULL) {
/* The bucket contains a pair that matches the provided key,
* change the value for that pair to the new value.
*/
if (strlen(pair->value) < value_len) {
/* If the new value is larger than the old value, re-allocate
* space for the new larger value.
*/
tmp_value = realloc(pair->value, (value_len + 1) * sizeof(char));
if (tmp_value == NULL) {
return 0;
}
pair->value = tmp_value;
}
/* Copy the new value into the pair that matches the key */
strcpy(pair->value, value);
return 1;
}
/* Allocate space for a new key and value */
new_key = malloc((key_len + 1) * sizeof(char));
if (new_key == NULL) {
return 0;
}
new_value = malloc((value_len + 1) * sizeof(char));
if (new_value == NULL) {
free(new_key);
return 0;
}
/* Create a key-value pair */
if (bucket->count == 0) {
/* The bucket is empty, lazily allocate space for a single
* key-value pair.
*/
bucket->pairs = malloc(sizeof(Pair));
if (bucket->pairs == NULL) {
free(new_key);
free(new_value);
return 0;
}
bucket->count = 1;
}
else {
/* The bucket wasn't empty but no pair existed that matches the provided
* key, so create a new key-value pair.
*/
tmp_pairs = realloc(bucket->pairs, (bucket->count + 1) * sizeof(Pair));
if (tmp_pairs == NULL) {
free(new_key);
free(new_value);
return 0;
}
bucket->pairs = tmp_pairs;
bucket->count++;
}
/* Get the last pair in the chain for the bucket */
pair = &(bucket->pairs[bucket->count - 1]);
pair->key = new_key;
pair->value = new_value;
/* Copy the key and its value into the key-value pair */
strcpy(pair->key, key);
strcpy(pair->value, value);
return 1;
}
int sm_get_count(const StrMap *map)
{
unsigned int i, j, n, m;
unsigned int count;
Bucket *bucket;
Pair *pair;
if (map == NULL) {
return 0;
}
bucket = map->buckets;
n = map->count;
i = 0;
count = 0;
while (i < n) {
pair = bucket->pairs;
m = bucket->count;
j = 0;
while (j < m) {
count++;
pair++;
j++;
}
bucket++;
i++;
}
return count;
}
int sm_enum(const StrMap *map, sm_enum_func enum_func, const void *obj)
{
unsigned int i, j, n, m;
Bucket *bucket;
Pair *pair;
if (map == NULL) {
return 0;
}
if (enum_func == NULL) {
return 0;
}
bucket = map->buckets;
n = map->count;
i = 0;
while (i < n) {
pair = bucket->pairs;
m = bucket->count;
j = 0;
while (j < m) {
enum_func(pair->key, pair->value, obj);
pair++;
j++;
}
bucket++;
i++;
}
return 1;
}
/*
* Returns a pair from the bucket that matches the provided key,
* or null if no such pair exist.
*/
static Pair * get_pair(Bucket *bucket, const char *key)
{
unsigned int i, n;
Pair *pair;
n = bucket->count;
if (n == 0) {
return NULL;
}
pair = bucket->pairs;
i = 0;
while (i < n) {
if (pair->key != NULL && pair->value != NULL) {
if (strcmp(pair->key, key) == 0) {
return pair;
}
}
pair++;
i++;
}
return NULL;
}
/*
* Returns a hash code for the provided string.
*/
static unsigned long hash(const char *str)
{
unsigned long hash = 5381;
int c;
while (c = *str++) {
hash = ((hash << 5) + hash) + c;
}
return hash;
}
前一节与这节这两种实现方法看似比较类似,但也有差异:
基于哈希桶的情况下,由于Hash桶容量的限制,所以,有可能发生Hash表填不满的情况,也就是,虽然Hash表里面还有空位,但是新建的表项由于冲突过多,而不能装入Hash表中。不过,这样的实现也有其好处,就是查表的最大开销是可以确定的,因为最多处理的冲突数是确定的,所以算法的时间复杂度为O(1)+O(m),其中m为Hash桶容量。
而另一种通过链表的实现,由于Hash桶的容量是无限的,因此,只要没有超出Hash表的最大容量,就能够容纳新建的表项。但是,一旦发生了Hash冲突严重的情况,就会造成Hash桶的链表过长,大大降低查找效率。在最坏的情况下,时间复杂度退化为O(n),其中n为Hash表的总容量。当然,这种情况的概率小之又小,几乎是可以忽略的。