Python数据结构-哈希表（Hash Table）

一、哈希表

哈希表（Hash Table）：通过键 key 和一个映射函数 Hash(key) 计算出对应的值 value，把关键码值映射到表中一个位置来访问记录，以加快查找的速度。
哈希函数（Hash Function）：将哈希表中元素的关键键值映射为元素存储位置的函数。
哈希冲突（Hash Collision）：不同的关键字通过同一个哈希函数可能得到同一哈希地址。
哈希表的两个核心问题是：「哈希函数的构建」 和 「哈希冲突的解决方法」。

常用的哈希函数方法有：直接定址法、除留余数法、平方取中法、基数转换法、数字分析法、折叠法、随机数法、乘积法、点积法等。
常用的哈希冲突的解决方法有两种：开放地址法和链地址法。

705. 设计哈希集合

输入：
["MyHashSet", "add", "add", "contains", "contains", "add", "contains", "remove", "contains"]
[[], [1], [2], [1], [3], [2], [2], [2], [2]]
输出：
[null, null, null, true, false, null, true, null, false]
解释：
MyHashSet myHashSet = new MyHashSet();
myHashSet.add(1); // set = [1]
myHashSet.add(2); // set = [1, 2]
myHashSet.contains(1); // 返回 True
myHashSet.contains(3); // 返回 False ，（未找到）
myHashSet.add(2); // set = [1, 2]
myHashSet.contains(2); // 返回 True
myHashSet.remove(2); // set = [1]
myHashSet.contains(2); // 返回 False ，（已移除）

# 定义一个一维长度为 buckets 的二维数组 table。第一维度用于计算哈希函数，为 key 分桶。
# 第二个维度用于寻找 key 存放的具体位置。第二维度的数组会根据 key 值动态增长，模拟真正的链表。

class MyHashSet:
    def __init__(self):
        self.buckets = 1003
        self.table = [[] for _ in range(self.buckets)]

    def hash(self, key):
        return key % self.buckets        # 用取余数的方法实现集合

    def add(self, key: int) -> None:
        hash_key = self.hash(key)
        if key in self.table[hash_key]:
            return 
        self.table[hash_key].append(key)

    def remove(self, key: int) -> None:
        hash_key = self.hash(key)
        if key not in self.table[hash_key]:
            return
        self.table[hash_key].remove(key)

    def contains(self, key: int) -> bool:
        hash_key = self.hash(key)
        return key in self.table[hash_key]

706. 设计哈希映射

输入：
["MyHashMap", "put", "put", "get", "get", "put", "get", "remove", "get"]
[[], [1, 1], [2, 2], [1], [3], [2, 1], [2], [2], [2]]
输出：
[null, null, null, 1, -1, null, 1, null, -1]
解释：
MyHashMap myHashMap = new MyHashMap();
myHashMap.put(1, 1); // myHashMap 现在为 [[1,1]]
myHashMap.put(2, 2); // myHashMap 现在为 [[1,1], [2,2]]
myHashMap.get(1); // 返回 1 ，myHashMap 现在为 [[1,1], [2,2]]
myHashMap.get(3); // 返回 -1（未找到），myHashMap 现在为 [[1,1], [2,2]]
myHashMap.put(2, 1); // myHashMap 现在为 [[1,1], [2,1]]（更新已有的值）
myHashMap.get(2); // 返回 1 ，myHashMap 现在为 [[1,1], [2,1]]
myHashMap.remove(2); // 删除键为 2 的数据，myHashMap 现在为 [[1,1]]
myHashMap.get(2); // 返回 -1（未找到），myHashMap 现在为 [[1,1]]

# 设计一个二维数组
class MyHashMap:
    def __init__(self):
        self.buckets = 1003
        self.table = [[] for _ in range(self.buckets)]

    # 哈希映射关系
    def hash(self, key):
        return key % self.buckets

    def put(self, key: int, value: int) -> None:
        hash_key = self.hash(key)
        for item in self.table[hash_key]:
            if item[0] == key:
                item[1] = value 
                return 
        self.table[hash_key].append([key, value])
            
    def get(self, key: int) -> int:
        hash_key = self.hash(key)
        for item in self.table[hash_key]:
            if item[0] == key:
                return item[-1]
        return -1

    def remove(self, key: int) -> None:
        hash_key = self.hash(key)
        for i, item in enumerate(self.table[hash_key]):
            if item[0] == key:
                self.table[hash_key].pop(i)
                return

217. 存在重复元素

输入：nums = [1,2,3,1]
输出：true

class Solution:
    def containsDuplicate(self, nums: List[int]) -> bool:
        hashMap = {}
        for num in nums:
            if num in hashMap:
                hashMap[num] += 1
            else:
                hashMap[num] = 1

        for index in hashMap:
            if hashMap[index] >= 2:
                return True
        return False

class Solution:
    def containsDuplicate(self, nums: List[int]) -> bool:
        hashMap = set()
        for num in nums:
            if num in hashMap:
                return True
            else:
                hashMap.add(num)
        return False

219. 存在重复元素 II

给你一个整数数组 nums 和一个整数 k ，判断数组中是否存在两个不同的索引 i 和 j ，满足 nums[i] == nums[j] 且 abs(i - j) <= k 。如果存在，返回 true ；否则，返回 false 。

# 找到重复元素和其索引
class Solution:
    def containsNearbyDuplicate(self, nums: List[int], k: int) -> bool:
        hashMap = {}
        index = []
        for i in range(len(nums)):
            # 已经存在重复的情况
            if nums[i] in hashMap and abs(i - hashMap[nums[i]]) <= k:
                return True
            else:
                hashMap[nums[i]] = i
        return False

# 维护一个长度为 k 的集合
class Solution:
    def containsNearbyDuplicate(self, nums: List[int], k: int) -> bool:
        nums_set = set()
        for i in range(len(nums)):
            # 存在重复元素
            if nums[i] in nums_set:
                return True
            nums_set.add(nums[i])
            # 及时删除超出数组长度的元素
            if len(nums_set) > k:
                nums_set.remove(nums[i - k])
        return False

220. 存在重复元素 III

给你一个整数数组 nums 和两个整数 k 和 t 。请你判断是否存在两个不同下标 i 和 j，使得 abs(nums[i] - nums[j]) <= t ，同时又满足 abs(i - j) <= k 。

如果存在则返回 true，不存在返回 false。

输入：nums = [1,2,3,1], k = 3, t = 0
输出：true

class Solution:
    def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool:
        bucket_dict = dict()
        for i in range(len(nums)):
            # 将nums[i]划分到大小为 t + 1 的不同桶中，分桶操作
            num = nums[i] // (t + 1)
            # print(num)

            # 如果桶中已经有元素，有相同的分桶结果，表示存在相同元素
            if num in bucket_dict:
                return True
            # 将 nums[i] 放入桶中
            bucket_dict[num] = nums[i]
            # print(bucket_dict)

            # 判断左侧桶是否满足条件
            if (num - 1) in bucket_dict and abs(bucket_dict[num - 1] - nums[i]) <= t:
                return True
            # 判断右侧桶是否满足条件
            if (num + 1) in bucket_dict and abs(bucket_dict[num + 1] - nums[i]) <= t:
                return True
            # 将 i - k 之前的旧桶清除，因为之前的桶已经不满足条件了
            if i >= k:
                bucket_dict.pop(nums[i-k] // (t + 1))  
        return False

349. 两个数组的交集

给定两个数组 nums1 和 nums2 ，返回它们的交集。输出结果中的每个元素一定是唯一的。我们可以不考虑输出结果的顺序。

class Solution:
    def intersection(self, nums1: List[int], nums2: List[int]) -> List[int]:
        l1 = set(nums1)
        l2 = set(nums2)
        return list(l1 & l2)

class Solution:
    def intersection(self, nums1: List[int], nums2: List[int]) -> List[int]:
        hashmap = dict()
        result = []
        for num1 in nums1:
            if num1 not in hashmap:
                hashmap[num1] = 1         # 只做一次计数
        for num2 in nums2:
            if num2 in hashmap and hashmap[num2] != 0:
                hashmap[num2] -= 1        # 及时对结果进行处理  
                result.append(num2)
        return result

350. 两个数组的交集 II

给你两个整数数组 nums1 和 nums2 ，请你以数组形式返回两数组的交集。返回结果中每个元素出现的次数，应与元素在两个数组中都出现的次数一致（如果出现次数不一致，则考虑取较小值）。可以不考虑输出结果的顺序。

class Solution:
    def intersect(self, nums1: List[int], nums2: List[int]) -> List[int]:
        hashmap = {}
        result = []
        for num1 in nums1:
            if num1 in hashmap:
                hashmap[num1] += 1
            else:
                hashmap[num1] = 1

        for num2 in nums2:
            if num2 in hashmap and hashmap[num2] != 0:
                hashmap[num2] -= 1
                result.append(num2)
        return result

36. 有效的数独

请你判断一个 9 x 9 的数独是否有效。只需要根据以下规则，验证已经填入的数字是否有效即可。
数字 1-9 在每一行只能出现一次。
数字 1-9 在每一列只能出现一次。
数字 1-9 在每一个以粗实线分隔的 3x3 宫内只能出现一次。（请参考示例图）

class Solution:
    def isValidSudoku(self, board: List[List[str]]) -> bool:
        # 记录行数据、列数据、3x3格子数据，用于标记 1-9 共10个数字
        rows = [[0 for _ in range(10)] for _ in range(10)]
        columns = [[0 for _ in range(10)] for _ in range(10)]
        boxes = [[0 for _ in range(10)] for _ in range(10)]

        for i in range(9):
            for j in range(9):
                if board[i][j] != '.':
                    # 1-9数字是否重复出现
                    num = int(board[i][j])
                    board_index = (i // 3) * 3 + j // 3  # 方格角标的计算用 box[(i/3)*3+(j/3)][n] 来表示
                    if rows[i][num] > 0 or columns[j][num] > 0 or boxes[board_index][num] > 0:
                        return False
                    rows[i][num] = 1
                    columns[j][num] = 1
                    boxes[board_index][num] = 1
        return True

from typing import Hashable

class Test:
    def test(self):
        # create hashtable by array
        hashTable = ['']*4
        # create hashtable by dictionary
        mapping = {}

        # add element
        # time complexity:O(1)
        hashTable[1] = 'a'
        hashTable[2] = 'b'
        hashTable[3] = 'c'
        mapping[1] = 'a'
        mapping[2] = 'b'
        mapping[3] = 'c'

        # update element
        # time complexity: O(1)
        hashTable[1] = 'aa'
        mapping[1] = 'aa'

        # remove element
        # time complexity: O(1)
        hashTable[1] = ''
        mapping.pop(1)

        # get value
        # time complexity:O(1)
        hashTable[3]
        mapping[3]

        # check value
        # time complexity: O(1)
        3 in mapping

        # length
        # time complexity: O(1)
        len(mapping)

        # is empty
        # time complexity: O(1)
        len(mapping) == 0

if __name__ == '__main__':
    test = Test()
    test.test()

力扣217
力扣389
力扣496

内容参考：https://algo.itcharge.cn/05.%E5%93%88%E5%B8%8C%E8%A1%A8/01.%E5%93%88%E5%B8%8C%E8%A1%A8%E7%9F%A5%E8%AF%86/