哈希是一种算法,将指定的数据按一定规律映射到一段空间内,又可以按照这种规律对它的值进行相应的操作,这一段空间可以称作哈希表,它的的查找速度要快于线性的数据结构,同时也快于表格队列等,所以它具有独特的优势,一般将哈希算法用于快速查找和加密算法。
对于最简单的哈希表,里面设置一个key,它决定将这个值存于哈希表的什么位置,同时把每个设置一个状态,如果有插入数据就将其设置为EXITS,其他操作同理,现在可以实现最简单的哈希表。
namespace First
{
enum State
{
EMPTY,
DELETE,
EXITS
};
template
class HashTable
{
public:
HashTable(size_t capacity = 10)//构造
:_capacity(capacity)
, _tables(new T[_capacity])
, _states(new State[_capacity])
, _size(0)
{
for (int i = 0; i < _capacity; i++)//最初始得状态置成空的
{
_states[i] = EMPTY;
}
}
~HashTable()//析构
{
delete[] _tables;
delete[] _states;
}
HashTable(const HashTable
:_capacity(h._capacity)
, _tables(new T[h._capacity])
, _states(new State[h._capacity])
, _size(h._size)
{
for (int i = 0; i < h._capacity; i++)
{
_tables[i] = h._tables[i];
_states[i] = h._states[i];
}
}
HashTable& operator=(HashTable
{
if (this != &h)
{
swap(_tables, h._tables);
swap(_states, h._states);
swap(_capacity, h._capacity);
swap(_size, h._size);
}
return *this;
}
bool Insert(const T& key)//插入
{
if (_size == _capacity)
{
cout << "HashTable full" << endl;
return false;
}
int index = HashFunc(key);
int start = index;
while (_states[index] == EXITS)//往后线形探测
{
if (_tables[index] == key)//有相等的
{
return false;
}
index++;
if (index == _capacity)//最后一个
{
index = 0;
}
if (index == start)//找了一圈没找到
{
return false;
}
}
_tables[index] = key;
_states[index] = EXITS;
_size++;
}
bool Find(const T& key)//查找
{
int index = HashFunc(key);
int start = index;
while (_states[index] != EMPTY)
{
if (_tables[index] == key)
{
if (_states[index] != DELETE)
{
cout << "find succees" << endl;
return true;
}
else
{
cout << "find fail" << endl;
return false;
}
}
index++;
if (index == _capacity)
{
index = 0;
}
if (start == index)
{
cout << "find fail" << endl;
return false;
}
}
cout << "find fail" << endl;
return false;
}
bool Remove(const T& key)///删除
{
int index = HashFunc(key);
int start = index;
while (_states[index] != EMPTY)
{
if (_tables[index] == key)
{
if (_states[index] != DELETE)
{
cout << "delete key" << endl;
_states[index] = DELETE;
return true;
}
else
{
cout << "delete fail" << endl;
return false;
}
}
index++;
if (index == _capacity)
{
index = 0;
}
if (start == index)
{
return false;
}
}
cout << "delete fail" << endl;
return true;
}
void Print()//打印哈希表
{
for (int i = 0; i < _capacity; i++)
{
cout << '[' << _tables[i] << ',' << _states[i] << ']' << ' ';
}
cout << endl;
}
protected:
int HashFunc(const T& key)
{
return key%_capacity;
}
private:
size_t _capacity;
T* _tables;
State* _states;
size_t _size;
};
}
/**************************************/
从上面的代码可以看出,这个哈希表并不适用于实际,因为首先它是一个静态的,如果存入的key值过多就会造成越界访问,同时用的是线性探测方法,这样降低了cpu的访问命中率,现在可以实现一种动态的而且随意设置负载因子的功能。
namespace Second//因为有负载因子的限制,可以提高cpu访问命中率
{
enum State
{
EMPTY,
DELETE,
EXITS
};
template
class HashTable
{
public:
HashTable(size_t capacity = 30)//构造
:_capacity(capacity)
, _tables(new T[_capacity])
, _states(new State[_capacity])
, _size(0)
{
for (int i = 0; i < _capacity; i++)//最初始得状态置成空的
{
_states[i] = EMPTY;
}
}
~HashTable()//析构
{
delete[] _tables;
delete[] _states;
}
HashTable(const HashTable
:_capacity(h._capacity)
, _tables(new T[h._capacity])
, _states(new State[h._capacity])
, _size(h._size)
{
for (int i = 0; i { _tables[i] = h._tables[i]; _states[i] = h._states[i]; } } HashTable& operator=(HashTable { if (this != &h) { swap(_tables, h._tables); swap(_states, h._states); swap(_capacity, h._capacity); swap(_size, h._size); } return *this; } //bool Insert(const T& key)//插入(线性探测) //{ //_CheckCapacity(); //int index = _HashFunc(key); //int start = index; //while (_states[index]==EXITS) //{ //if (_tables[index] == key) //{ //return false; //} //index++; //if (index == _capacity) //{ //index = 0; //} //if (index == start) //{ //return false; //} // //} //_tables[index] = key; //_states[index] = EXITS; //_size++; //} bool Insert(const T& key)//插入(二次探测,即某个数的二次方,这样数据存着更稀疏) { _CheckCapacity(); int index = _HashFunc(key); int start = index; int i = 0; while (_states[index]==EXITS) { if (_tables[index] == key) { return false; }
index = _HashFuncT(index, ++i); if (start = index) { return false; } if (index == _capacity) { index = 0; } } _tables[index] = key; _states[index] = EXITS; _size++; } bool Find(const T& key)//查找 { int index = _HashFunc(key); int start = index; int i = 0; while (_states[index]!=EMPTY) {
if (_tables[index] == key) { if (_states[index] != DELETE) { cout << "find success" << endl; return true; } else { cout << "find fail" << endl; return false; } } index = _HashFuncT(index, ++i); if (start = index) { cout << "find fail" << endl; return false; } if (index == _capacity) { index = 0; } } cout << "find fail" << endl; return false; } bool Remove(const T& key)///删除 { int index = _HashFunc(key); int start = index; int i = 0; while (_states[index] == EXITS) { if (_tables[index] == key) { _states[index] = DELETE; _size--; return true; } index = _HashFuncT(index, ++i); if (start == index) { return false; } if (index == _capacity) { index = 0; } } return false; } void Print()//打印哈希表 { for (int i = 0; i < _capacity; i++) { cout << '[' << _tables[i] << ',' << _states[i] << ']' << ' '; } cout << endl; } protected: int _HashFuncT(int index,int i) { return (index + i*i) % _capacity; } int _HashFunc(const T& key) { return key%_capacity; }
void _CheckCapacity()//检查容量 { if ((10 * _size)/ _capacity == 6)//负载因子设为0.6 { HashTable for (int i = 0; i < _capacity; i++) { if (_states[i]==EXITS) { tmp.Insert(_tables[i]); } } _swap(tmp); } } void _swap(HashTable { swap(_tables, h._tables); swap(_states, h._states); swap(_capacity, h._capacity); swap(_size, h._size); } private: size_t _capacity; T* _tables; State* _states; size_t _size; }; } /****************************************/ 上面的代码对于key形式的相对第一种已经比较健全了。现在可以利用哈希算法可以实现一种key/value形式的功能,可以支持字典功能,key是一个信息,同时value是key的一个附带信息,比如说key为学号,那么班级就是附带的信息value,例如还有简单的英汉字典形式,现进行简单的实现。 namespace Third//支持字典形式的 { enum State { EMPTY, DELETE, EXITS };
template struct HashTableNode { HashTableNode() {} HashTableNode(const T& key, const V& value) :_key(key) , _value(value) {} T _key; V _value; }; template struct __HashFunc { size_t operator()(const T& key) { return key; } }; //实现key,value形式,并且是二次探测的 template class Dictionary { public: Dictionary(size_t capacity=10) :_capacity(capacity) , _tables(new HashTableNode , _states(new State[_capacity]) ,_size(0) { for (int i = 0; i < _capacity; i++) { _states[i] = EMPTY;//将最开始的状态置为空 } } ~Dictionary() { delete[] _tables; delete[] _states; } bool Insert(const T& key,const V& value) { _CheckCapacity(); int index = _HashFunonce(key); int start = index; int i = 0; while (_states[index] == EXITS) { if (_tables[index]._key == key) { return false; } index = _HashFuntwice(index, ++i); if (index == _capacity) { index = 0; } if (index == start) { return false; } } _tables[index] = HashTableNode _states[index] = EXITS; _size++; return true; } HashTableNode { int index = _HashFunonce(key); int start = index; int i = 0; while (_states[index]==EXITS) { if (_tables[index]._key == key) { cout << "find success" << endl; return _tables+index; } index = _HashFuntwice(index, ++i); if (start == index) { cout << "find fail" << endl; return NULL; } } cout << "find fail" << endl; return NULL; } bool Remove(const T& key) { int index = _HashFunonce(key); int start = index; int i = 0; while (_states[index]!=EMPTY) { if (_tables[index]._key == key) { if (_states[index]!=DELETE) { _states[index] = DELETE; _size--; return true; } else { return false; }
} index = _HashFuntwice(index, ++i); if (index == start) { return false; } } return false; } void Print() { for (int i = 0; i < _capacity; i++) { cout << "[" << _tables[i]._key << "," << _tables[i]._value <<","<< _states[i]<<"]" << " "; } cout << endl; } protected: void _CheckCapacity()//将负载因子设为0.6 { if (_size * 10 / _capacity == 6) { Dictionary for (int i = 0; i < _capacity; i++) { if (_states[i] == EXITS) { tmp.Insert(_tables[i]._key,_tables[i]._value); } } _Swap(tmp); } } void _Swap(Dictionary { swap(_tables, tmp._tables); swap(_states, tmp._states); swap(_capacity, tmp._capacity); swap(_size, tmp._size); } size_t _HashFunonce(const T& key) { return key %_capacity; } size_t _HashFuntwice(int index,int i)//获取二次探测的下标 { return (index + i*i) % _capacity; } private: size_t _capacity; HashTableNode State* _states; size_t _size; }; } void test3()//二次探测,负载因子,实现字典的功能 { /*Third::Dictionary h1.Insert(10, "c语言基础"); h1.Insert(59, "c++基础"); h1.Insert(9, "数据结构"); h1.Insert(19, "Linux"); h1.Insert(18, "网络编程");*/ Third::Dictionary h1.Insert(10, 1); h1.Insert(59, 2); h1.Insert(9, 3); h1.Insert(19,4); h1.Insert(18, 5); //h1.Print(); cout< //h1.Remove(9); //h1.Remove(19); //h1.Remove(10); //h1.Print(); } 上述就是对哈希算法的简单应用。