数据结构-C语言版本(七)哈希集合和哈希表

数据结构中的哈希集合与哈希表:概念、操作与实战

第一部分 哈希集合和哈希表的分类及常见形式

哈希表(Hash Table)和哈希集合(Hash Set)是基于哈希函数实现的高效数据结构,平均情况下可以提供O(1)时间复杂度的查找、插入和删除操作。

1. 哈希表(散列表)

键值对存储结构,通过键快速访问值

#define TABLE_SIZE 1000

typedef struct HashNode {
    int key;
    int value;
    struct HashNode* next;
} HashNode;

typedef struct {
    HashNode** buckets;
    int size;
} HashMap;

2. 哈希集合

只存储唯一键的结构,不存储值

typedef struct SetNode {
    int key;
    struct SetNode* next;
} SetNode;

typedef struct {
    SetNode** buckets;
    int size;
} HashSet;

3. 常见哈希冲突解决方法

链地址法(Separate Chaining)
// 如上HashMap和HashSet的实现就是使用链地址法
开放寻址法(Open Addressing)
typedef struct {
    int* keys;
    int* values;
    int size;
    int capacity;
} OpenAddressingHashMap;

4. 动态扩容哈希表

当负载因子超过阈值时自动扩容

typedef struct {
    HashNode** buckets;
    int size;
    int capacity;
    float loadFactor;
} DynamicHashMap;

第二部分 哈希集合和哈希表常见操作

1. 哈希表基本操作

初始化哈希表
HashMap* createHashMap() {
    HashMap* map = (HashMap*)malloc(sizeof(HashMap));
    map->buckets = (HashNode**)calloc(TABLE_SIZE, sizeof(HashNode*));
    map->size = 0;
    return map;
}
哈希函数
int hashFunction(int key) {
    return key % TABLE_SIZE;
}
插入键值对
void put(HashMap* map, int key, int value) {
    int index = hashFunction(key);
    HashNode* node = map->buckets[index];
    
    // 检查是否已存在
    while(node != NULL) {
        if(node->key == key) {
            node->value = value; // 更新值
            return;
        }
        node = node->next;
    }
    
    // 创建新节点
    HashNode* newNode = (HashNode*)malloc(sizeof(HashNode));
    newNode->key = key;
    newNode->value = value;
    newNode->next = map->buckets[index];
    map->buckets[index] = newNode;
    map->size++;
}
获取值
int get(HashMap* map, int key) {
    int index = hashFunction(key);
    HashNode* node = map->buckets[index];
    
    while(node != NULL) {
        if(node->key == key) {
            return node->value;
        }
        node = node->next;
    }
    
    return -1; // 未找到
}
删除键值对
void removeKey(HashMap* map, int key) {
    int index = hashFunction(key);
    HashNode* node = map->buckets[index];
    HashNode* prev = NULL;
    
    while(node != NULL) {
        if(node->key == key) {
            if(prev == NULL) {
                map->buckets[index] = node->next;
            } else {
                prev->next = node->next;
            }
            free(node);
            map->size--;
            return;
        }
        prev = node;
        node = node->next;
    }
}

2. 哈希集合基本操作

初始化哈希集合
HashSet* createHashSet() {
    HashSet* set = (HashSet*)malloc(sizeof(HashSet));
    set->buckets = (SetNode**)calloc(TABLE_SIZE, sizeof(SetNode*));
    set->size = 0;
    return set;
}
添加元素
void add(HashSet* set, int key) {
    int index = hashFunction(key);
    SetNode* node = set->buckets[index];
    
    // 检查是否已存在
    while(node != NULL) {
        if(node->key == key) {
            return; // 已存在
        }
        node = node->next;
    }
    
    // 创建新节点
    SetNode* newNode = (SetNode*)malloc(sizeof(SetNode));
    newNode->key = key;
    newNode->next = set->buckets[index];
    set->buckets[index] = newNode;
    set->size++;
}
检查元素是否存在
int contains(HashSet* set, int key) {
    int index = hashFunction(key);
    SetNode* node = set->buckets[index];
    
    while(node != NULL) {
        if(node->key == key) {
            return 1; // 存在
        }
        node = node->next;
    }
    
    return 0; // 不存在
}
删除元素
void removeFromSet(HashSet* set, int key) {
    int index = hashFunction(key);
    SetNode* node = set->buckets[index];
    SetNode* prev = NULL;
    
    while(node != NULL) {
        if(node->key == key) {
            if(prev == NULL) {
                set->buckets[index] = node->next;
            } else {
                prev->next = node->next;
            }
            free(node);
            set->size--;
            return;
        }
        prev = node;
        node = node->next;
    }
}

3. 动态扩容实现

void resizeHashMap(DynamicHashMap* map) {
    int newCapacity = map->capacity * 2;
    HashNode** newBuckets = (HashNode**)calloc(newCapacity, sizeof(HashNode*));
    
    // 重新哈希所有元素
    for(int i = 0; i < map->capacity; i++) {
        HashNode* node = map->buckets[i];
        while(node != NULL) {
            HashNode* next = node->next;
            int newIndex = node->key % newCapacity;
            
            node->next = newBuckets[newIndex];
            newBuckets[newIndex] = node;
            
            node = next;
        }
    }
    
    free(map->buckets);
    map->buckets = newBuckets;
    map->capacity = newCapacity;
}

void dynamicPut(DynamicHashMap* map, int key, int value) {
    // 检查是否需要扩容
    if((float)map->size / map->capacity > map->loadFactor) {
        resizeHashMap(map);
    }
    
    // 插入逻辑与普通哈希表相同
    // ...
}

第三部分 哈希集合和哈希表编程题例子

1. 两数之和

int* twoSum(int* nums, int numsSize, int target, int* returnSize) {
    HashMap* map = createHashMap();
    int* result = (int*)malloc(2 * sizeof(int));
    *returnSize = 2;
    
    for(int i = 0; i < numsSize; i++) {
        int complement = target - nums[i];
        int complementIndex = get(map, complement);
        
        if(complementIndex != -1) {
            result[0] = complementIndex;
            result[1] = i;
            return result;
        }
        
        put(map, nums[i], i);
    }
    
    return NULL;
}

2. 无重复字符的最长子串

int lengthOfLongestSubstring(char* s) {
    int charIndex[128]; // ASCII字符集
    memset(charIndex, -1, sizeof(charIndex));
    
    int maxLength = 0;
    int start = 0;
    
    for(int end = 0; s[end] != '\0'; end++) {
        if(charIndex[s[end]] >= start) {
            start = charIndex[s[end]] + 1;
        }
        charIndex[s[end]] = end;
        maxLength = fmax(maxLength, end - start + 1);
    }
    
    return maxLength;
}

3. 字母异位词分组

#define PRIME_NUM 31

long hashString(char* str) {
    long hash = 0;
    for(int i = 0; str[i] != '\0'; i++) {
        hash = PRIME_NUM * hash + str[i];
    }
    return hash;
}

char*** groupAnagrams(char** strs, int strsSize, int* returnSize, int** returnColumnSizes) {
    HashMap* map = createHashMap();
    int groupCount = 0;
    
    for(int i = 0; i < strsSize; i++) {
        // 复制字符串并排序作为键
        char* key = strdup(strs[i]);
        int len = strlen(key);
        qsort(key, len, sizeof(char), compareChar);
        
        long hash = hashString(key);
        int index = get(map, hash);
        
        if(index == -1) {
            put(map, hash, groupCount);
            index = groupCount++;
        }
        
        free(key);
    }
    
    // 分配结果内存
    // ... 具体实现略 ...
    
    return result;
}

4. 存在重复元素

bool containsDuplicate(int* nums, int numsSize) {
    HashSet* set = createHashSet();
    
    for(int i = 0; i < numsSize; i++) {
        if(contains(set, nums[i])) {
            return true;
        }
        add(set, nums[i]);
    }
    
    return false;
}

5. 最长连续序列

int longestConsecutive(int* nums, int numsSize) {
    HashSet* set = createHashSet();
    
    // 将所有数字加入集合
    for(int i = 0; i < numsSize; i++) {
        add(set, nums[i]);
    }
    
    int maxLength = 0;
    
    for(int i = 0; i < numsSize; i++) {
        // 只有当当前数字是序列起点时才检查
        if(!contains(set, nums[i] - 1)) {
            int currentNum = nums[i];
            int currentLength = 1;
            
            while(contains(set, currentNum + 1)) {
                currentNum++;
                currentLength++;
            }
            
            maxLength = fmax(maxLength, currentLength);
        }
    }
    
    return maxLength;
}

6. 设计LRU缓存

typedef struct LRUNode {
    int key;
    int value;
    struct LRUNode* prev;
    struct LRUNode* next;
} LRUNode;

typedef struct {
    int capacity;
    int size;
    LRUNode* head;
    LRUNode* tail;
    LRUNode** hashMap; // 简化的哈希表,实际应使用更高效的实现
} LRUCache;

LRUCache* lRUCacheCreate(int capacity) {
    LRUCache* cache = (LRUCache*)malloc(sizeof(LRUCache));
    cache->capacity = capacity;
    cache->size = 0;
    cache->head = NULL;
    cache->tail = NULL;
    cache->hashMap = (LRUNode**)calloc(10000, sizeof(LRUNode*)); // 假设key范围在0-9999
    return cache;
}

void moveToHead(LRUCache* cache, LRUNode* node) {
    if(node == cache->head) return;
    
    // 从链表中移除节点
    if(node->prev) node->prev->next = node->next;
    if(node->next) node->next->prev = node->prev;
    
    // 如果是尾节点,更新尾指针
    if(node == cache->tail) {
        cache->tail = node->prev;
    }
    
    // 将节点移到头部
    node->prev = NULL;
    node->next = cache->head;
    if(cache->head) cache->head->prev = node;
    cache->head = node;
    
    // 如果链表为空,更新尾指针
    if(cache->tail == NULL) {
        cache->tail = node;
    }
}

int lRUCacheGet(LRUCache* obj, int key) {
    LRUNode* node = obj->hashMap[key];
    if(node == NULL) return -1;
    
    moveToHead(obj, node);
    return node->value;
}

void lRUCachePut(LRUCache* obj, int key, int value) {
    LRUNode* node = obj->hashMap[key];
    
    if(node != NULL) {
        // 更新现有节点的值并移到头部
        node->value = value;
        moveToHead(obj, node);
        return;
    }
    
    // 创建新节点
    node = (LRUNode*)malloc(sizeof(LRUNode));
    node->key = key;
    node->value = value;
    node->prev = NULL;
    node->next = NULL;
    
    // 如果缓存已满,移除最近最少使用的节点
    if(obj->size == obj->capacity) {
        LRUNode* tail = obj->tail;
        obj->hashMap[tail->key] = NULL;
        
        if(obj->head == obj->tail) {
            obj->head = NULL;
            obj->tail = NULL;
        } else {
            obj->tail = tail->prev;
            obj->tail->next = NULL;
        }
        
        free(tail);
        obj->size--;
    }
    
    // 添加新节点到头部
    node->next = obj->head;
    if(obj->head) obj->head->prev = node;
    obj->head = node;
    
    if(obj->tail == NULL) {
        obj->tail = node;
    }
    
    obj->hashMap[key] = node;
    obj->size++;
}

哈希表和哈希集合是计算机科学中最高效的数据结构之一,广泛应用于数据库索引、缓存实现、编译器符号表等领域。通过合理的哈希函数设计和冲突解决方法,可以在大多数情况下实现接近常数时间的操作。理解这些数据结构的实现原理和应用场景,对于解决实际编程问题至关重要。

你可能感兴趣的:(c语言,数据结构,散列表,哈希算法)