目录
1.哈希表概念
2.常用哈希函数:
解决哈希冲突的两个方法:
一.闭散列法(开放定址法):
二.开散列法(哈希桶):
3.封装unordered_map,unordered_set
#pragma once
#include
#include
using std::vector;
using std::pair;
//线性探测法
//
//给每一个空间标记
enum state{EXIST,DELETE,EMPTY};
template
class hashtable
{
//表中存储elem结构,包含pair键值对和标记状态的state
struct elem
{
pair _val;
state _state;
};
public:
hashtable(size_t capacity=3)
:_ht(capacity)
,_size(0)
{
for (size_t i = 0; i < capacity; i++)
{
_ht[i]._state = EMPTY;
}
}
~hashtable()
{}
bool Insert(const pair& val)
{
checkCapacity();
size_t hashi = val.first % _ht.size();
while (_ht[hashi]._state != EMPTY)
{
if (_ht[hashi]._state == EXIST
&& _ht[hashi]._val.first == val.first)
return false;
++hashi;
if (hashi == _ht.size())
hashi = 0;
}
_ht[hashi]._state = EXIST;
_ht[hashi]._val = val;
_size++;
return true;
}
elem* Find(const K& k)
{
if (_ht.size() == 0)
return nullptr;
size_t hashi = k % _ht.size();
while (_ht[hashi]._state != EMPTY)
{
if (_ht[hashi]._state == EXIST && _ht[hashi]._val.first == k)
return &_ht[hashi];
++hashi;
if (hashi == _ht.size())
hashi = 0;
}
return nullptr;
}
private:
void checkCapacity()
{
if (_size * 10 / _ht.size() >= 7)
{
size_t newsize = _ht.size() * 2;
hashtablenewht(newsize);
for (auto e : _ht)
{
if (e._state == EXIST)
newht.Insert({ e._val.first,e._val.second });
}
newht._size = _size;
newht._ht.swap(_ht);
}
}
bool Erase(const K&k)
{
if (elem* p = Find(k))
{
p->_state = DELETE;
--_size;
return true;
}
return false;
}
size_t HashFunc(const K& k)
{
return k % _ht.size();
}
private:
vector_ht;
size_t _size;
};
//二次探测法
bool Insert(const pair& val)
{
checkCapacity();
size_t hashi = val.first % _ht.size();
size_t index = hashi;
int i = 0;
while (_ht[hashi]._state != EMPTY)
{
if (_ht[hashi]._state == EXIST
&& _ht[hashi]._val.first == val.first)
return false;
++i;
hashi=index+i*i;
hashi %= _ht.size();
}
_ht[hashi]._state = EXIST;
_ht[hashi]._val = val;
_size++;
return true;
}
//二次探测法
elem* Find(const K& k)
{
if (_ht.size() == 0)
return nullptr;
size_t hashi = k % _ht.size();
size_t index = hashi;
int i = 0;
while (_ht[hashi]._state != EMPTY)
{
if (_ht[hashi]._state == EXIST && _ht[hashi]._val.first == k)
return &_ht[hashi];
i++;
hashi=index+i*i;
hashi %= _ht.size();
}
return nullptr;
}
开散列法:首先对关键码的集合通过哈希函数映射到对应地址,具有相同地址的的关键码归于同一个子集合,每一个子集合称为一个桶,各个桶中的元素通过一个单链表链接起来,各链表的头结点存储在哈希表中。(即将哈希冲突的元素放在单链表中)
简单实现:
namespace openhash
{
template
struct hashBucketNode
{
hashBucketNode(const V& data)
:_next(nullptr)
, _data(data)
{}
hashBucketNode* _next;
V _data;
};
template
class hashBucket
{
typedef hashBucketNode Node;
public:
hashBucket(size_t capacity = 3)
:_size(0)
{
_ht.resize(capacity, nullptr);
}
Node* insert(const V& data)
{
//几号桶
size_t Bucketi = data % _ht.size();
Node* cur = _ht[Bucketi];
while (cur)
{
if (cur->_data == data)
return cur;
cur = cur->_next;
}
Node* newnode = new Node(data);
newnode->_next = _ht[Bucketi];
_ht[Bucketi] = newnode;
_size++;
return newnode;
}
Node* erase(const V& data)
{
size_t Bucketi = data % _ht.size();
Node* cur = _ht[Bucketi], prev = nullptr;
while (cur)
{
if (cur->_data == data)
{
if (prev)
prev->_next = cur->_next;
else
_ht[Bucketi] = cur->_next;
//返回删除元素的下一个节点
prev = cur->_next;
delete cur;
--_size;
return prev;
}
}
return nullptr;
}
Node* Find(const V& data)
{
size_t Bucketi = data % _ht.size();
Node* cur = _ht[Bucketi];
while (cur)
{
if (cur->_data == data)
return cur;
cur = cur->_next;
}
return nullptr;
}
size_t size()const { return _size; }
bool Empty()const { return _size == 0; }
void clear()
{
for (int i = 0; i < _ht.size(); i++)
{
while (_ht[i])
{
Node* cur = _ht[i];
_ht[i] = cur->_next;
delete cur;
}
}
_size = 0;
}
~hashBucket() { clear(); }
private:
size_t hashFunc(const V& data) { return data % _ht.size(); }
private:
vector _ht;
size_t _size;
};
}
开散列法的扩容:
因为扩容时桶的总数改变了,每个元素需要重新映射地址,所以有大的性能消耗。理想情况下是每个桶一个元素,于是,可以在桶的个数等于存储的元素个数的时候进行哈希表的扩容操作。
扩容代码:(将原链表节点一个个插入到新的哈希桶)
size_t BucketCount()const
{
size_t bc = 0;
for (auto p : _ht)
if (p)++bc;
return bc;
}
private:
void checkCapacity()
{
if (BucketCount() == _size)
{
vectornewht(_size == 0 ? 3:_size * 2);
for (auto& p : _ht)
{
Node* pnode = p;
while (pnode)
{
p = p->_next;
size_t Bucketi = pnode->_data % newht.size();
pnode->_next = newht[Bucketi];
newht[Bucketi] = pnode;
pnode = p;
}
}
_ht.swap(newht);
}
}
//hashtable11-3.h
namespace openhash
{
template
struct HashFunc
{
//默认的哈希函数
size_t operator()(const T& val) { return val; }
};
//针对字符串哈希函数->模板特化
template<>
struct HashFunc
{
size_t operator()(const string& s)
{
const char* str = s.c_str();
size_t seed = 31;
size_t hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
return hash;
}
};
//定义节点:存储的T可能是K,也可能是pair
template
struct HashNode
{
HashNode(const T&data)
:_next(nullptr)
,_data(data)
{}
HashNode* _next;
T _data;
};
//迭代器需用到哈希桶,要做前置声明
template
class HashBucket;
//实现迭代器,其中有一个节点指针和哈希桶的指针
template
class __HBIterator
{
typedef HashNode Node;
typedef HashBucket* Phb;
typedef __HBIterator Self;
public:
__HBIterator(){}
__HBIterator(Node*node,Phb phb)
:_node(node),_phb(phb)
{}
//单向迭代器只需实现++
Self& operator++()//前置加加
{
assert(_node);//当_node为空(即end,不可以++)
if (_node->_next)
_node = _node->_next;
else
{
KeyOfT kot;
HashFunc hf;
size_t sz = _phb->_table.size();
//计算下一个桶
size_t hashi = hf(kot(_node->_data)) % sz + 1;
//寻找下一个不为nullptr的桶
while (hashi < sz && !_phb->_table[hashi])
++hashi;
if (hashi == sz)
_node = nullptr;//已经走完最后一个哈希桶
else
_node = _phb->_table[hashi];
}
return *this;
}
Self& operator++(int)//后置加加
{
Self ret(_node, _phb);
operator++();
return ret;
}
T& operator*() { return _node->_data; }
T* operator->() { return &_node->_data; }
bool operator!=(const Self& s) const { return _node != s._node; }
bool operator==(const Self& s) const { return _node == s._node; }
private:
Node* _node;
Phb _phb;
};
template
class HashBucket
{
template
friend class __HBIterator;//将迭代器作为哈希桶的友元类模板,以便能访问哈希表(遍历桶)
typedef HashNode Node;
public:
typedef __HBIterator Iterator;
Iterator begin()
{
for (auto p : _table)
if (p)return Iterator(p, this);
return Iterator(nullptr, this);
}
Iterator end() { return Iterator(nullptr, this); }
HashBucket(size_t capacity = 0)
:_table(GetNextPrime(_table.size()),nullptr)
,_size(0)
{}
~HashBucket() { Clear(); }
void Clear()
{
for (size_t i = 0; i < _table.size(); i++)
{
while (_table[i])
{
Node* cur = _table[i];
_table[i] = cur->_next;
delete cur;
}
}
_size = 0;
}
Iterator find(const K& key)
{
HashFunc hf;
size_t hashi = hf(key) % _table.size();
Node* cur = _table[hashi];
while (cur)
{
if (KeyOfT()(cur->_data) == key)
return Iterator(cur, this);
cur = cur->_next;
}
return Iterator(nullptr, this);
}
pair insert(const T& data)
{
Iterator it = find(KeyOfT()(data));
if (it != end())
return make_pair(it, false);
checkCapacity();
Node* newnode = new Node(data);
size_t hashi = HashFunc()(KeyOfT()(data)) % _table.size();
newnode->_next = _table[hashi];
_table[hashi] = newnode;
++_size;
return make_pair(Iterator(newnode, this), true);
}
size_t erase(const K& key)
{
KeyOfT kot;
HashFunc hf;
size_t hashi = hf(key)%_table.size();
Node* prev = nullptr;
Node* cur = _table[hashi];
while (cur)
{
if (kot(cur->_data) == key)
{
if (prev)
prev->_next = cur->_next;
else
_table[hashi] = cur->_next;
delete cur;
--_size;
return 1;
}
prev = cur;
cur = cur->_next;
}
return 0;
}
private:
//将桶的数量设置为一个素数,据说能减少哈希冲突
size_t GetNextPrime(size_t prime)
{
const int PRIMECOUNT = 28;
//素数表
static const size_t primeList[PRIMECOUNT] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
// 获取比prime大那一个素数
size_t i = 0;
for (; i < PRIMECOUNT; ++i)
{
if (primeList[i] > prime)
return primeList[i];
}
return primeList[i];
}
void checkCapacity()
{
if (_size == _table.size())
{
KeyOfT kot;
HashFunc hf;
vectornewtable(GetNextPrime(_table.size()),nullptr);
for (auto p : _table)
{
auto cur = p;
while (cur)
{
Node* next = cur->_next;
cur->_next = newtable[hf(kot(p->_data))];
newtable[hf(kot(p->_data))] = cur;
cur = next;
}
}
newtable.swap(_table);
}
}
private:
vector_table;
size_t _size;
};
}
#pragma once
#include"hashtable11-3.h"
namespace openhash
{
template >
class unordered_map
{
struct MapKeyOfT { const K& operator()(const pair& p) { return p.first; } };
public:
typedef typename HashBucket, MapKeyOfT, HashFunc>::Iterator iterator;
iterator begin() { return _hb.begin(); }
iterator end() { return _hb.end(); }
iterator find(const K& key) {
return _hb.find(key);
}
size_t erase(const K& key) {
return _hb.erase(key);
}
pairinsert(const pair& p) {
return _hb.insert(p);
}
V& operator[](const K& k) {
pairp = insert(make_pair(k, V()));
return p.first->second;
}
private:
HashBucket, MapKeyOfT, HashFunc>_hb;
};
}
#pragma once
namespace openhash
{
#include"hashtable11-3.h"
template >
class unordered_set
{
struct SetKeyOfT { const K& operator()(const K& k) { return k; } };
public:
typedef typename HashBucket::Iterator iterator;
iterator begin() { return _hb.begin(); }
iterator end() { return _hb.end(); }
iterator find(const K& key) {
return _hb.find(key);
}
size_t erase(const K& key) {
return _hb.erase(key);
}
pairinsert(const K& k) {
return _hb.insert(k);
}
private:
HashBucket_hb;
};
}