康熙38bdc

C++ unordered_map与unordered_set的模拟实现

目录

0.前言

1.哈希表（HashTable）设计

1.1设计思想

1.2 HashTable.h

1.3设计思路

2.unordered_map封装

2.1 UnorderedMap.h

2.2代码解释

2.3测试函数

3.unordered_set封装

3.1 UnorderedSet.h

3.2代码解释

3.3测试函数

4.结语

（图像由AI生成）

0.前言

在C++标准库中，unordered_map和unordered_set是两个常用的关联容器，它们分别用于存储键值对和唯一值。它们的底层实现基于哈希表，能够提供高效的插入、查找和删除操作。本文将详细介绍如何使用C++实现unordered_map和unordered_set，并展示其具体实现代码。

1.哈希表（HashTable）设计

哈希表是实现unordered_map和unordered_set的基础。在本设计中，我们使用拉链法（链地址法）解决哈希冲突。拉链法通过在每个哈希桶中存储一个链表，使得每个桶可以包含多个具有相同哈希值的元素。

1.1设计思想

哈希函数：我们定义了一个通用的哈希函数模板，并对字符串类型进行了特化处理。哈希函数的作用是将关键码转换为哈希值。
哈希节点：每个哈希节点存储一个元素，并包含指向下一个节点的指针。
哈希表结构：哈希表由一个指针数组和链表组成，每个指针指向一个哈希桶。
迭代器：为了遍历哈希表，我们定义了迭代器，支持哈希表的遍历操作。
基本操作：插入、查找和删除操作通过哈希函数确定元素的存储位置，并在相应的链表中进行操作。

1.2 HashTable.h

#pragma once
#include 
#include 
#include 
using namespace std;

// 哈希函数模板
template
struct HashFunc {
    size_t operator()(const K& key) {
        return (size_t)key;
    }
};

// 特化字符串哈希函数
template<>
struct HashFunc {
    size_t operator()(const string& key) {
        size_t hash = 0;
        for (auto e : key) {
            hash *= 31;
            hash += e;
        }
        return hash;
    }
};

namespace hash_bucket {
    // 哈希节点结构
    template
    struct HashNode {
        T _data;
        HashNode* _next;

        // 构造函数
        HashNode(const T& data)
            : _data(data), _next(nullptr) {}
    };

    // 前置声明
    template
    class HashTable;

    // 哈希表迭代器
    template
    struct HTIterator {
        typedef HashNode Node;
        typedef HTIterator Self;

        Node* _node; // 当前节点
        const HashTable* _pht; // 哈希表指针

        // 构造函数
        HTIterator(Node* node, const HashTable* pht)
            : _node(node), _pht(pht) {}

        // 重载*运算符
        Ref operator*() {
            return _node->_data;
        }

        // 重载->运算符
        Ptr operator->() {
            return &_node->_data;
        }

        // 重载!=运算符
        bool operator!=(const Self& s) {
            return _node != s._node;
        }

        // 重载++运算符（前置）
        Self& operator++() {
            if (_node->_next) {
                _node = _node->_next;
            } else {
                KeyOfT kot;
                Hash hs;
                size_t hashi = hs(kot(_node->_data)) % _pht->_tables.size();
                ++hashi;
                while (hashi < _pht->_tables.size()) {
                    if (_pht->_tables[hashi]) {
                        break;
                    }
                    ++hashi;
                }
                if (hashi == _pht->_tables.size()) {
                    _node = nullptr; // end()
                } else {
                    _node = _pht->_tables[hashi];
                }
            }
            return *this;
        }
    };

    // 哈希表类
    template
    class HashTable {
        // 友元声明
        template
        friend struct HTIterator;

        typedef HashNode Node;
    public:
        typedef HTIterator Iterator;
        typedef HTIterator ConstIterator;

        // 开始迭代器
        Iterator Begin() {
            if (_n == 0)
                return End();
            for (size_t i = 0; i < _tables.size(); i++) {
                Node* cur = _tables[i];
                if (cur) {
                    return Iterator(cur, this);
                }
            }
            return End();
        }

        // 结束迭代器
        Iterator End() {
            return Iterator(nullptr, this);
        }

        // 常量开始迭代器
        ConstIterator Begin() const {
            if (_n == 0)
                return End();
            for (size_t i = 0; i < _tables.size(); i++) {
                Node* cur = _tables[i];
                if (cur) {
                    return ConstIterator(cur, this);
                }
            }
            return End();
        }

        // 常量结束迭代器
        ConstIterator End() const {
            return ConstIterator(nullptr, this);
        }

        // 构造函数
        HashTable() {
            _tables.resize(10, nullptr);
        }

        // 析构函数
        ~HashTable() {
            for (size_t i = 0; i < _tables.size(); i++) {
                Node* cur = _tables[i];
                while (cur) {
                    Node* next = cur->_next;
                    delete cur;
                    cur = next;
                }
                _tables[i] = nullptr;
            }
        }

        // 插入元素
        pair Insert(const T& data) {
            KeyOfT kot;
            Iterator it = Find(kot(data));
            if (it != End())
                return make_pair(it, false);

            Hash hs;
            size_t hashi = hs(kot(data)) % _tables.size();

            if (_n == _tables.size()) {
                vector newtables(_tables.size() * 2, nullptr);
                for (size_t i = 0; i < _tables.size(); i++) {
                    Node* cur = _tables[i];
                    while (cur) {
                        Node* next = cur->_next;
                        size_t hashi = hs(kot(cur->_data)) % newtables.size();
                        cur->_next = newtables[hashi];
                        newtables[hashi] = cur;
                        cur = next;
                    }
                    _tables[i] = nullptr;
                }
                _tables.swap(newtables);
            }

            Node* newnode = new Node(data);
            newnode->_next = _tables[hashi];
            _tables[hashi] = newnode;
            ++_n;

            return make_pair(Iterator(newnode, this), true);
        }

        // 查找元素
        Iterator Find(const K& key) {
            KeyOfT kot;
            Hash hs;
            size_t hashi = hs(key) % _tables.size();
            Node* cur = _tables[hashi];
            while (cur) {
                if (kot(cur->_data) == key) {
                    return Iterator(cur, this);
                }
                cur = cur->_next;
            }
            return End();
        }

        // 删除元素
        bool Erase(const K& key) {
            KeyOfT kot;
            Hash hs;
            size_t hashi = hs(key) % _tables.size();
            Node* prev = nullptr;
            Node* cur = _tables[hashi];
            while (cur) {
                if (kot(cur->_data) == key) {
                    if (prev == nullptr) {
                        _tables[hashi] = cur->_next;
                    } else {
                        prev->_next = cur->_next;
                    }
                    delete cur;
                    --_n;
                    return true;
                }
                prev = cur;
                cur = cur->_next;
            }
            return false;
        }

    private:
        vector _tables; // 哈希表桶数组
        size_t _n = 0; // 表中存储的数据个数
    };
}

1.3设计思路

哈希函数：通用的哈希函数模板和针对字符串类型的特化版本，确保不同类型的数据都能正确计算哈希值。
哈希节点：每个哈希节点存储一个数据元素，并且包含指向下一个节点的指针，用于链表结构。
哈希表类：哈希表类包含一个指针数组，每个指针指向一个哈希桶。哈希表支持插入、查找和删除操作。
迭代器：哈希表迭代器支持对哈希表的遍历操作，通过重载运算符实现。

2.unordered_map封装

unordered_map是一个基于哈希表实现的键值对容器。我们将使用前面设计的HashTable类来实现unordered_map。unordered_map提供了高效的插入、查找和删除操作，适用于需要快速查找的场景。

2.1 UnorderedMap.h

#pragma once
#include "HashTable.h"

namespace wxk {
    // unordered_map类模板
    template>
    class unordered_map {
        // 定义一个提取键的仿函数
        struct MapKeyOfT {
            const K& operator()(const pair& kv) {
                return kv.first;
            }
        };

    public:
        // 定义迭代器类型
        typedef typename hash_bucket::HashTable, MapKeyOfT, Hash>::Iterator iterator;
        typedef typename hash_bucket::HashTable, MapKeyOfT, Hash>::ConstIterator const_iterator;

        // 返回指向容器第一个元素的迭代器
        iterator begin() {
            return _ht.Begin();
        }

        // 返回指向容器末尾的迭代器
        iterator end() {
            return _ht.End();
        }

        // 返回指向容器第一个元素的常量迭代器
        const_iterator begin() const {
            return _ht.Begin();
        }

        // 返回指向容器末尾的常量迭代器
        const_iterator end() const {
            return _ht.End();
        }

        // 插入键值对
        pair insert(const pair& kv) {
            return _ht.Insert(kv);
        }

        // 重载[]操作符，访问或插入元素
        V& operator[](const K& key) {
            pair ret = _ht.Insert(make_pair(key, V()));
            return ret.first->second;
        }

        // 查找元素
        iterator Find(const K& key) {
            return _ht.Find(key);
        }

        // 删除元素
        bool Erase(const K& key) {
            return _ht.Erase(key);
        }

    private:
        // 使用哈希表来存储键值对
        hash_bucket::HashTable, MapKeyOfT, Hash> _ht;
    };

    // 测试unordered_map
     void test_map() {
        unordered_map dict;

        // 插入键值对
        dict.insert({1, "one"});
        dict.insert({2, "two"});
        dict.insert({3, "three"});
        dict.insert({4, "four"});
        dict.insert({5, "five"});

        // 使用[]操作符访问和修改元素
        dict[2] = "TWO";
        dict[6] = "six"; // 插入新的键值对

        // 查找元素
        auto it = dict.Find(3);
        if (it != dict.end()) {
            cout << "Found: " << it->first << " -> " << it->second << endl;
        } else {
            cout << "Not Found: 3" << endl;
        }

        // 删除元素
        bool erased = dict.Erase(4);
        cout << "Element with key 4 " << (erased ? "was erased." : "not found.") << endl;

        // 遍历unordered_map
        cout << "Contents of the unordered_map:" << endl;
        for (auto it = dict.begin(); it != dict.end(); ++it) {
            cout << it->first << " -> " << it->second << endl;
        }
        cout << endl;
    }

}

2.2代码解释

MapKeyOfT仿函数：用于提取键值对中的键，operator() 返回键值对的第一个元素，即键。
类型定义：定义了iterator和const_iterator，分别用于遍历和访问unordered_map中的元素。
begin() 和 end()：返回指向容器第一个元素和末尾的迭代器，支持常量版本。
insert()：插入键值对，使用哈希表的Insert方法。
operator[]：重载[]操作符，用于访问或插入元素。如果键不存在，则插入默认值。
Find()：查找指定键的元素，返回指向该元素的迭代器。
Erase()：删除指定键的元素，返回操作是否成功的布尔值。
私有成员变量：使用哈希表HashTable存储键值对。

2.3测试函数

test_map()：测试unordered_map的插入、删除、访问和遍历功能，展示了如何使用该容器进行基本操作。

输出结果：

Found: 3 -> three
Element with key 4 was erased.
Contents of the unordered_map:
1 -> one
2 -> TWO
3 -> three
5 -> five
6 -> six

3.unordered_set封装

unordered_set是一个基于哈希表实现的唯一值容器。我们将使用前面设计的HashTable类来实现unordered_set。unordered_set提供了高效的插入、查找和删除操作，适用于需要快速查找唯一元素的场景。

3.1 UnorderedSet.h

#pragma once
#include "HashTable.h"

namespace wxk {
    // unordered_set类模板
    template>
    class unordered_set {
        // 定义一个提取键的仿函数
        struct SetKeyOfT {
            const K& operator()(const K& key) {
                return key;
            }
        };

    public:
        // 定义迭代器类型
        typedef typename hash_bucket::HashTable::Iterator iterator;
        typedef typename hash_bucket::HashTable::ConstIterator const_iterator;

        // 返回指向容器第一个元素的迭代器
        iterator begin() {
            return _ht.Begin();
        }

        // 返回指向容器末尾的迭代器
        iterator end() {
            return _ht.End();
        }

        // 返回指向容器第一个元素的常量迭代器
        const_iterator begin() const {
            return _ht.Begin();
        }

        // 返回指向容器末尾的常量迭代器
        const_iterator end() const {
            return _ht.End();
        }

        // 插入元素
        pair insert(const K& key) {
            return _ht.Insert(key);
        }

        // 查找元素
        iterator Find(const K& key) {
            return _ht.Find(key);
        }

        // 删除元素
        bool Erase(const K& key) {
            return _ht.Erase(key);
        }

    private:
        // 使用哈希表来存储唯一元素
        hash_bucket::HashTable _ht;
    };

    // 测试unordered_set
    void test_set() {
        unordered_set s;

        // 插入元素
        int a[] = { 4, 2, 6, 1, 3, 5, 15, 7, 16, 14, 3, 3, 15 };
        for (auto e : a) {
            s.insert(e);
        }

        // 查找元素
        auto it = s.Find(6);
        if (it != s.end()) {
            cout << "Found: " << *it << endl;
        } else {
            cout << "Not Found: 6" << endl;
        }

        // 删除元素
        bool erased = s.Erase(7);
        cout << "Element 7 " << (erased ? "was erased." : "not found.") << endl;

        // 遍历unordered_set
        cout << "Contents of the unordered_set:" << endl;
        for (auto it = s.begin(); it != s.end(); ++it) {
            cout << *it << " ";
        }
        cout << endl;
    }
}

3.2代码解释

SetKeyOfT仿函数：用于提取集合中的元素，operator() 返回元素本身。
类型定义：定义了iterator和const_iterator，分别用于遍历和访问unordered_set中的元素。
begin() 和 end()：返回指向容器第一个元素和末尾的迭代器，支持常量版本。
insert()：插入元素，使用哈希表的Insert方法。
Find()：查找指定元素，返回指向该元素的迭代器。
Erase()：删除指定元素，返回操作是否成功的布尔值。
私有成员变量：使用哈希表HashTable存储唯一元素。

3.3测试函数

让我们编写一个新的测试函数，展示更多unordered_set的功能，包括插入、查找、删除和遍历操作。

void test_set() {
    unordered_set s;

    // 插入元素
    int a[] = { 4, 2, 6, 1, 3, 5, 15, 7, 16, 14, 3, 3, 15 };
    for (auto e : a) {
        s.insert(e);
    }

    // 查找元素
    auto it = s.Find(6);
    if (it != s.end()) {
        cout << "Found: " << *it << endl;
    } else {
        cout << "Not Found: 6" << endl;
    }

    // 删除元素
    bool erased = s.Erase(7);
    cout << "Element 7 " << (erased ? "was erased." : "not found.") << endl;

    // 遍历unordered_set
    cout << "Contents of the unordered_set:" << endl;
    for (auto it = s.begin(); it != s.end(); ++it) {
        cout << *it << " ";
    }
    cout << endl;

    // 插入更多元素并检查唯一性
    s.insert(10);
    s.insert(2); // 重复插入2，测试唯一性
    s.insert(8);

    // 遍历unordered_set
    cout << "Contents of the unordered_set after more insertions:" << endl;
    for (auto it = s.begin(); it != s.end(); ++it) {
        cout << *it << " ";
    }
    cout << endl;
}