哈希表(Hash Table)和哈希集合(Hash Set)是基于哈希函数实现的高效数据结构,平均情况下可以提供O(1)时间复杂度的查找、插入和删除操作。
键值对存储结构,通过键快速访问值
#define TABLE_SIZE 1000
typedef struct HashNode {
int key;
int value;
struct HashNode* next;
} HashNode;
typedef struct {
HashNode** buckets;
int size;
} HashMap;
只存储唯一键的结构,不存储值
typedef struct SetNode {
int key;
struct SetNode* next;
} SetNode;
typedef struct {
SetNode** buckets;
int size;
} HashSet;
// 如上HashMap和HashSet的实现就是使用链地址法
typedef struct {
int* keys;
int* values;
int size;
int capacity;
} OpenAddressingHashMap;
当负载因子超过阈值时自动扩容
typedef struct {
HashNode** buckets;
int size;
int capacity;
float loadFactor;
} DynamicHashMap;
HashMap* createHashMap() {
HashMap* map = (HashMap*)malloc(sizeof(HashMap));
map->buckets = (HashNode**)calloc(TABLE_SIZE, sizeof(HashNode*));
map->size = 0;
return map;
}
int hashFunction(int key) {
return key % TABLE_SIZE;
}
void put(HashMap* map, int key, int value) {
int index = hashFunction(key);
HashNode* node = map->buckets[index];
// 检查是否已存在
while(node != NULL) {
if(node->key == key) {
node->value = value; // 更新值
return;
}
node = node->next;
}
// 创建新节点
HashNode* newNode = (HashNode*)malloc(sizeof(HashNode));
newNode->key = key;
newNode->value = value;
newNode->next = map->buckets[index];
map->buckets[index] = newNode;
map->size++;
}
int get(HashMap* map, int key) {
int index = hashFunction(key);
HashNode* node = map->buckets[index];
while(node != NULL) {
if(node->key == key) {
return node->value;
}
node = node->next;
}
return -1; // 未找到
}
void removeKey(HashMap* map, int key) {
int index = hashFunction(key);
HashNode* node = map->buckets[index];
HashNode* prev = NULL;
while(node != NULL) {
if(node->key == key) {
if(prev == NULL) {
map->buckets[index] = node->next;
} else {
prev->next = node->next;
}
free(node);
map->size--;
return;
}
prev = node;
node = node->next;
}
}
HashSet* createHashSet() {
HashSet* set = (HashSet*)malloc(sizeof(HashSet));
set->buckets = (SetNode**)calloc(TABLE_SIZE, sizeof(SetNode*));
set->size = 0;
return set;
}
void add(HashSet* set, int key) {
int index = hashFunction(key);
SetNode* node = set->buckets[index];
// 检查是否已存在
while(node != NULL) {
if(node->key == key) {
return; // 已存在
}
node = node->next;
}
// 创建新节点
SetNode* newNode = (SetNode*)malloc(sizeof(SetNode));
newNode->key = key;
newNode->next = set->buckets[index];
set->buckets[index] = newNode;
set->size++;
}
int contains(HashSet* set, int key) {
int index = hashFunction(key);
SetNode* node = set->buckets[index];
while(node != NULL) {
if(node->key == key) {
return 1; // 存在
}
node = node->next;
}
return 0; // 不存在
}
void removeFromSet(HashSet* set, int key) {
int index = hashFunction(key);
SetNode* node = set->buckets[index];
SetNode* prev = NULL;
while(node != NULL) {
if(node->key == key) {
if(prev == NULL) {
set->buckets[index] = node->next;
} else {
prev->next = node->next;
}
free(node);
set->size--;
return;
}
prev = node;
node = node->next;
}
}
void resizeHashMap(DynamicHashMap* map) {
int newCapacity = map->capacity * 2;
HashNode** newBuckets = (HashNode**)calloc(newCapacity, sizeof(HashNode*));
// 重新哈希所有元素
for(int i = 0; i < map->capacity; i++) {
HashNode* node = map->buckets[i];
while(node != NULL) {
HashNode* next = node->next;
int newIndex = node->key % newCapacity;
node->next = newBuckets[newIndex];
newBuckets[newIndex] = node;
node = next;
}
}
free(map->buckets);
map->buckets = newBuckets;
map->capacity = newCapacity;
}
void dynamicPut(DynamicHashMap* map, int key, int value) {
// 检查是否需要扩容
if((float)map->size / map->capacity > map->loadFactor) {
resizeHashMap(map);
}
// 插入逻辑与普通哈希表相同
// ...
}
int* twoSum(int* nums, int numsSize, int target, int* returnSize) {
HashMap* map = createHashMap();
int* result = (int*)malloc(2 * sizeof(int));
*returnSize = 2;
for(int i = 0; i < numsSize; i++) {
int complement = target - nums[i];
int complementIndex = get(map, complement);
if(complementIndex != -1) {
result[0] = complementIndex;
result[1] = i;
return result;
}
put(map, nums[i], i);
}
return NULL;
}
int lengthOfLongestSubstring(char* s) {
int charIndex[128]; // ASCII字符集
memset(charIndex, -1, sizeof(charIndex));
int maxLength = 0;
int start = 0;
for(int end = 0; s[end] != '\0'; end++) {
if(charIndex[s[end]] >= start) {
start = charIndex[s[end]] + 1;
}
charIndex[s[end]] = end;
maxLength = fmax(maxLength, end - start + 1);
}
return maxLength;
}
#define PRIME_NUM 31
long hashString(char* str) {
long hash = 0;
for(int i = 0; str[i] != '\0'; i++) {
hash = PRIME_NUM * hash + str[i];
}
return hash;
}
char*** groupAnagrams(char** strs, int strsSize, int* returnSize, int** returnColumnSizes) {
HashMap* map = createHashMap();
int groupCount = 0;
for(int i = 0; i < strsSize; i++) {
// 复制字符串并排序作为键
char* key = strdup(strs[i]);
int len = strlen(key);
qsort(key, len, sizeof(char), compareChar);
long hash = hashString(key);
int index = get(map, hash);
if(index == -1) {
put(map, hash, groupCount);
index = groupCount++;
}
free(key);
}
// 分配结果内存
// ... 具体实现略 ...
return result;
}
bool containsDuplicate(int* nums, int numsSize) {
HashSet* set = createHashSet();
for(int i = 0; i < numsSize; i++) {
if(contains(set, nums[i])) {
return true;
}
add(set, nums[i]);
}
return false;
}
int longestConsecutive(int* nums, int numsSize) {
HashSet* set = createHashSet();
// 将所有数字加入集合
for(int i = 0; i < numsSize; i++) {
add(set, nums[i]);
}
int maxLength = 0;
for(int i = 0; i < numsSize; i++) {
// 只有当当前数字是序列起点时才检查
if(!contains(set, nums[i] - 1)) {
int currentNum = nums[i];
int currentLength = 1;
while(contains(set, currentNum + 1)) {
currentNum++;
currentLength++;
}
maxLength = fmax(maxLength, currentLength);
}
}
return maxLength;
}
typedef struct LRUNode {
int key;
int value;
struct LRUNode* prev;
struct LRUNode* next;
} LRUNode;
typedef struct {
int capacity;
int size;
LRUNode* head;
LRUNode* tail;
LRUNode** hashMap; // 简化的哈希表,实际应使用更高效的实现
} LRUCache;
LRUCache* lRUCacheCreate(int capacity) {
LRUCache* cache = (LRUCache*)malloc(sizeof(LRUCache));
cache->capacity = capacity;
cache->size = 0;
cache->head = NULL;
cache->tail = NULL;
cache->hashMap = (LRUNode**)calloc(10000, sizeof(LRUNode*)); // 假设key范围在0-9999
return cache;
}
void moveToHead(LRUCache* cache, LRUNode* node) {
if(node == cache->head) return;
// 从链表中移除节点
if(node->prev) node->prev->next = node->next;
if(node->next) node->next->prev = node->prev;
// 如果是尾节点,更新尾指针
if(node == cache->tail) {
cache->tail = node->prev;
}
// 将节点移到头部
node->prev = NULL;
node->next = cache->head;
if(cache->head) cache->head->prev = node;
cache->head = node;
// 如果链表为空,更新尾指针
if(cache->tail == NULL) {
cache->tail = node;
}
}
int lRUCacheGet(LRUCache* obj, int key) {
LRUNode* node = obj->hashMap[key];
if(node == NULL) return -1;
moveToHead(obj, node);
return node->value;
}
void lRUCachePut(LRUCache* obj, int key, int value) {
LRUNode* node = obj->hashMap[key];
if(node != NULL) {
// 更新现有节点的值并移到头部
node->value = value;
moveToHead(obj, node);
return;
}
// 创建新节点
node = (LRUNode*)malloc(sizeof(LRUNode));
node->key = key;
node->value = value;
node->prev = NULL;
node->next = NULL;
// 如果缓存已满,移除最近最少使用的节点
if(obj->size == obj->capacity) {
LRUNode* tail = obj->tail;
obj->hashMap[tail->key] = NULL;
if(obj->head == obj->tail) {
obj->head = NULL;
obj->tail = NULL;
} else {
obj->tail = tail->prev;
obj->tail->next = NULL;
}
free(tail);
obj->size--;
}
// 添加新节点到头部
node->next = obj->head;
if(obj->head) obj->head->prev = node;
obj->head = node;
if(obj->tail == NULL) {
obj->tail = node;
}
obj->hashMap[key] = node;
obj->size++;
}
哈希表和哈希集合是计算机科学中最高效的数据结构之一,广泛应用于数据库索引、缓存实现、编译器符号表等领域。通过合理的哈希函数设计和冲突解决方法,可以在大多数情况下实现接近常数时间的操作。理解这些数据结构的实现原理和应用场景,对于解决实际编程问题至关重要。