hash_set的底层数据结构是哈希表,因此要深入了解hash_set,必须先分析哈希表。哈希表是根据关键码值(Key-Value)而直接进行访问的数据结构,它用哈希函数处理数据得到关键码值,关键码值对应表中一个特定位置再由应该位置来访问记录,这样可以在时间复杂性度为O(1)内访问到数据。但是很有可能出现多个数据经哈希函数处理后得到同一个关键码——这就产生了冲突,解决冲突的方法也有很多,采用最方便最有效的一种——链地址法,当有冲突发生时将具同一关键码的数据组成一个链表。下图展示了链地址法的使用:
前面已经介绍过哈希表了,这里直接看hash_set的源码:
// Filename: stl_hash_set.h // Comment By: 凝霜 // E-mail: [email protected] // Blog: http://blog.csdn.net/mdl13412 // hash_set和hash_multiset是对hashtable的简单包装, 很容易理解 /* * Copyright (c) 1996 * Silicon Graphics Computer Systems, Inc. * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without fee, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. Silicon Graphics makes no * representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied warranty. * * * Copyright (c) 1994 * Hewlett-Packard Company * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without fee, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. Hewlett-Packard Company makes no * representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied warranty. * */ /* NOTE: This is an internal header file, included by other STL headers. * You should not attempt to use it directly. */ #ifndef __SGI_STL_INTERNAL_HASH_SET_H #define __SGI_STL_INTERNAL_HASH_SET_H __STL_BEGIN_NAMESPACE #if defined(__sgi) && !defined(__GNUC__) && (_MIPS_SIM != _MIPS_SIM_ABI32) #pragma set woff 1174 #endif // 如果编译器不能根据前面模板参数推导出后面使用的默认参数类型, // 那么就需要手工指定, 并且对于基本的数据类型, 在<stl_hash_fun.h> // 中都提供hash函数 #ifndef __STL_LIMITED_DEFAULT_TEMPLATES template <class Value, class HashFcn = hash<Value>, class EqualKey = equal_to<Value>, class Alloc = alloc> #else template <class Value, class HashFcn, class EqualKey, class Alloc = alloc> #endif class hash_set { private: // identity<Value>用于析出Value typedef hashtable<Value, Value, HashFcn, identity<Value>, EqualKey, Alloc> ht; ht rep; // 其实hash_set就是hashtable的简单封装 public: typedef typename ht::key_type key_type; typedef typename ht::value_type value_type; typedef typename ht::hasher hasher; typedef typename ht::key_equal key_equal; // 注意: reference, pointer, iterator都为const, 因为不能修改hashtable // 内部的元素, 否则会导致hashtable失效 typedef typename ht::size_type size_type; typedef typename ht::difference_type difference_type; typedef typename ht::const_pointer pointer; typedef typename ht::const_pointer const_pointer; typedef typename ht::const_reference reference; typedef typename ht::const_reference const_reference; typedef typename ht::const_iterator iterator; typedef typename ht::const_iterator const_iterator; // 返回hash相关函数 hasher hash_funct() const { return rep.hash_funct(); } key_equal key_eq() const { return rep.key_eq(); } public: hash_set() : rep(100, hasher(), key_equal()) {} explicit hash_set(size_type n) : rep(n, hasher(), key_equal()) {} hash_set(size_type n, const hasher& hf) : rep(n, hf, key_equal()) {} hash_set(size_type n, const hasher& hf, const key_equal& eql) : rep(n, hf, eql) {} #ifdef __STL_MEMBER_TEMPLATES template <class InputIterator> hash_set(InputIterator f, InputIterator l) : rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); } template <class InputIterator> hash_set(InputIterator f, InputIterator l, size_type n) : rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); } template <class InputIterator> hash_set(InputIterator f, InputIterator l, size_type n, const hasher& hf) : rep(n, hf, key_equal()) { rep.insert_unique(f, l); } template <class InputIterator> hash_set(InputIterator f, InputIterator l, size_type n, const hasher& hf, const key_equal& eql) : rep(n, hf, eql) { rep.insert_unique(f, l); } #else hash_set(const value_type* f, const value_type* l) : rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); } hash_set(const value_type* f, const value_type* l, size_type n) : rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); } hash_set(const value_type* f, const value_type* l, size_type n, const hasher& hf) : rep(n, hf, key_equal()) { rep.insert_unique(f, l); } hash_set(const value_type* f, const value_type* l, size_type n, const hasher& hf, const key_equal& eql) : rep(n, hf, eql) { rep.insert_unique(f, l); } hash_set(const_iterator f, const_iterator l) : rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); } hash_set(const_iterator f, const_iterator l, size_type n) : rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); } hash_set(const_iterator f, const_iterator l, size_type n, const hasher& hf) : rep(n, hf, key_equal()) { rep.insert_unique(f, l); } hash_set(const_iterator f, const_iterator l, size_type n, const hasher& hf, const key_equal& eql) : rep(n, hf, eql) { rep.insert_unique(f, l); } #endif /*__STL_MEMBER_TEMPLATES */ public: // 下面都是对hashtable的简单封装, 见<stl_hashtable.h> size_type size() const { return rep.size(); } size_type max_size() const { return rep.max_size(); } bool empty() const { return rep.empty(); } void swap(hash_set& hs) { rep.swap(hs.rep); } friend bool operator== __STL_NULL_TMPL_ARGS (const hash_set&, const hash_set&); iterator begin() const { return rep.begin(); } iterator end() const { return rep.end(); } public: pair<iterator, bool> insert(const value_type& obj) { pair<typename ht::iterator, bool> p = rep.insert_unique(obj); r eturn pair<iterator, bool>(p.first, p.second); } #ifdef __STL_MEMBER_TEMPLATES template <class InputIterator> void insert(InputIterator f, InputIterator l) { rep.insert_unique(f,l); } #else void insert(const value_type* f, const value_type* l) { rep.insert_unique(f,l); } void insert(const_iterator f, const_iterator l) {rep.insert_unique(f, l); } #endif /*__STL_MEMBER_TEMPLATES */ // hash_set和set一样, 都不允许key重复 pair<iterator, bool> insert_noresize(const value_type& obj) { pair<typename ht::iterator, bool> p = rep.insert_unique_noresize(obj); return pair<iterator, bool>(p.first, p.second); } iterator find(const key_type& key) const { return rep.find(key); } size_type count(const key_type& key) const { return rep.count(key); } pair<iterator, iterator> equal_range(const key_type& key) const { return rep.equal_range(key); } size_type erase(const key_type& key) {return rep.erase(key); } void erase(iterator it) { rep.erase(it); } void erase(iterator f, iterator l) { rep.erase(f, l); } void clear() { rep.clear(); } public: void resize(size_type hint) { rep.resize(hint); } size_type bucket_count() const { return rep.bucket_count(); } size_type max_bucket_count() const { return rep.max_bucket_count(); } size_type elems_in_bucket(size_type n) const { return rep.elems_in_bucket(n); } }; template <class Value, class HashFcn, class EqualKey, class Alloc> inline bool operator==(const hash_set<Value, HashFcn, EqualKey, Alloc>& hs1, const hash_set<Value, HashFcn, EqualKey, Alloc>& hs2) { return hs1.rep == hs2.rep; } // 如果编译器支持模板函数特化优先级 // 那么将全局的swap实现为使用hash_set私有的swap以提高效率 #ifdef __STL_FUNCTION_TMPL_PARTIAL_ORDER template <class Val, class HashFcn, class EqualKey, class Alloc> inline void swap(hash_set<Val, HashFcn, EqualKey, Alloc>& hs1, hash_set<Val, HashFcn, EqualKey, Alloc>& hs2) { hs1.swap(hs2); } #endif /* __STL_FUNCTION_TMPL_PARTIAL_ORDER */ // hash_multiset和hash_set除去允许key重复外, 其余性质一致 #ifndef __STL_LIMITED_DEFAULT_TEMPLATES template <class Value, class HashFcn = hash<Value>, class EqualKey = equal_to<Value>, class Alloc = alloc> #else template <class Value, class HashFcn, class EqualKey, class Alloc = alloc> #endif class hash_multiset { private: typedef hashtable<Value, Value, HashFcn, identity<Value>, EqualKey, Alloc> ht; ht rep; public: typedef typename ht::key_type key_type; typedef typename ht::value_type value_type; typedef typename ht::hasher hasher; typedef typename ht::key_equal key_equal; typedef typename ht::size_type size_type; typedef typename ht::difference_type difference_type; typedef typename ht::const_pointer pointer; typedef typename ht::const_pointer const_pointer; typedef typename ht::const_reference reference; typedef typename ht::const_reference const_reference; typedef typename ht::const_iterator iterator; typedef typename ht::const_iterator const_iterator; hasher hash_funct() const { return rep.hash_funct(); } key_equal key_eq() const { return rep.key_eq(); } public: hash_multiset() : rep(100, hasher(), key_equal()) {} explicit hash_multiset(size_type n) : rep(n, hasher(), key_equal()) {} hash_multiset(size_type n, const hasher& hf) : rep(n, hf, key_equal()) {} hash_multiset(size_type n, const hasher& hf, const key_equal& eql) : rep(n, hf, eql) {} #ifdef __STL_MEMBER_TEMPLATES template <class InputIterator> hash_multiset(InputIterator f, InputIterator l) : rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); } template <class InputIterator> hash_multiset(InputIterator f, InputIterator l, size_type n) : rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); } template <class InputIterator> hash_multiset(InputIterator f, InputIterator l, size_type n, const hasher& hf) : rep(n, hf, key_equal()) { rep.insert_equal(f, l); } template <class InputIterator> hash_multiset(InputIterator f, InputIterator l, size_type n, const hasher& hf, const key_equal& eql) : rep(n, hf, eql) { rep.insert_equal(f, l); } #else hash_multiset(const value_type* f, const value_type* l) : rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); } hash_multiset(const value_type* f, const value_type* l, size_type n) : rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); } hash_multiset(const value_type* f, const value_type* l, size_type n, const hasher& hf) : rep(n, hf, key_equal()) { rep.insert_equal(f, l); } hash_multiset(const value_type* f, const value_type* l, size_type n, const hasher& hf, const key_equal& eql) : rep(n, hf, eql) { rep.insert_equal(f, l); } hash_multiset(const_iterator f, const_iterator l) : rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); } hash_multiset(const_iterator f, const_iterator l, size_type n) : rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); } hash_multiset(const_iterator f, const_iterator l, size_type n, const hasher& hf) : rep(n, hf, key_equal()) { rep.insert_equal(f, l); } hash_multiset(const_iterator f, const_iterator l, size_type n, const hasher& hf, const key_equal& eql) : rep(n, hf, eql) { rep.insert_equal(f, l); } #endif /*__STL_MEMBER_TEMPLATES */ public: size_type size() const { return rep.size(); } size_type max_size() const { return rep.max_size(); } bool empty() const { return rep.empty(); } void swap(hash_multiset& hs) { rep.swap(hs.rep); } friend bool operator== __STL_NULL_TMPL_ARGS (const hash_multiset&, const hash_multiset&); iterator begin() const { return rep.begin(); } iterator end() const { return rep.end(); } public: iterator insert(const value_type& obj) { return rep.insert_equal(obj); } #ifdef __STL_MEMBER_TEMPLATES template <class InputIterator> void insert(InputIterator f, InputIterator l) { rep.insert_equal(f,l); } #else void insert(const value_type* f, const value_type* l) { rep.insert_equal(f,l); } void insert(const_iterator f, const_iterator l) { rep.insert_equal(f, l); } #endif /*__STL_MEMBER_TEMPLATES */ iterator insert_noresize(const value_type& obj) { return rep.insert_equal_noresize(obj); } iterator find(const key_type& key) const { return rep.find(key); } size_type count(const key_type& key) const { return rep.count(key); } pair<iterator, iterator> equal_range(const key_type& key) const { return rep.equal_range(key); } size_type erase(const key_type& key) {return rep.erase(key); } void erase(iterator it) { rep.erase(it); } void erase(iterator f, iterator l) { rep.erase(f, l); } void clear() { rep.clear(); } public: void resize(size_type hint) { rep.resize(hint); } size_type bucket_count() const { return rep.bucket_count(); } size_type max_bucket_count() const { return rep.max_bucket_count(); } size_type elems_in_bucket(size_type n) const { return rep.elems_in_bucket(n); } }; template <class Val, class HashFcn, class EqualKey, class Alloc> inline bool operator==(const hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs1, const hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs2) { return hs1.rep == hs2.rep; } // 如果编译器支持模板函数特化优先级 // 那么将全局的swap实现为使用hash_multiset私有的swap以提高效率 #ifdef __STL_FUNCTION_TMPL_PARTIAL_ORDER template <class Val, class HashFcn, class EqualKey, class Alloc> inline void swap(hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs1, hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs2) { hs1.swap(hs2); } #endif /* __STL_FUNCTION_TMPL_PARTIAL_ORDER */ #if defined(__sgi) && !defined(__GNUC__) && (_MIPS_SIM != _MIPS_SIM_ABI32) #pragma reset woff 1174 #endif __STL_END_NAMESPACE #endif /* __SGI_STL_INTERNAL_HASH_SET_H */ // Local Variables: // mode:C++ // End:<span style="font-family:Microsoft YaHei;font-size:18px;"> </span>
示例1:
#include<iostream> #include<string> #include<iterator> #include<algorithm> #include<hash_set> #include<windows.h> using namespace std; void main(){ const int N= 3; int s1[N]={1,2,3}; int s2[N]={4,5,6}; hash_set<int>sa(s1,s1+N);//申明一个集合sa,元素为数组s1 hash_set<int>sb(s2,s2+N);//申明一个集合sb,元素为数组s2 hash_set<int> sc;//申明一个集合sc,为空集合 ostream_iterator<int> output(cout," "); cout<<"输出集合sa的元素:"; copy(sa.begin(),sa.end(),output); cout<<"\n\n输出集合sb的元素:"; copy(sb.begin(),sb.end(),output); cout<<endl; system("pause"); }
示例2:
/* * ************************************************************************************ * hash_set哈希集合容器的基础说明: ************************************************************************************ * * hash_set哈希集合容器:使用hashtable数据结构的具有高效数据检索的关联容器 * * 不提供反向迭代器,只有前向迭代器iterator和const_iterator * 不允许插入重复的元素键值 * Hashed Associative Container Simple Associative Container Unique Associative Container * * 目前还不是C++的标准容器,只是SGI C++ STL的一个扩展容器 * 使用hash_set必须使用宏语句#include <hash_set> * ************************************************************************************** * * 创建hash_set对象: * 1.hash_set<int> hs; //键值比较使用默认的函数对象equal_to<Value> * 2.hash_set(size_type n); //在质数列表中找出第一个大于等于n的质数作为表长:hash_set<int> hs(100); * 3.hash_set(size_type n,const hasher& h); //hash函数对象为h * 4.hash_set(size_type n,const hasher& h,const key_equal& k);//键值比较函数对象k * 5.hash_set(const hash_set& h); //用一个hash集合容器拷贝生成另一个hash集合容器:hash_set<int> hs2(hs); * ************************************************************************************** * * 元素的插入 * //typedef pair<const key,T> value_type; * pair<iterator,bool> insert(const value_type& v);//second:返回true/false插入成功标志 * void insert(iterator pos,const value_type& v); * ************************************************************************************** * * 元素的删除 * void erase(iterator pos); * size_type erase(const key_type& k); //删除等于键值k的元素 * void erase(first,last); //删除[first,last)区间的元素 * void clear(); * ************************************************************************************** * * 访问与搜索 * * iterator begin();iterator end(); //不会将元素排序遍历出来 * * iterator find(const key_type& k) const; //对于非默认类型如char*,在搜素时应定义相关的函数对象 * * 其它常用函数 * bool empty() const; * size_type size() const; * size_type bucket_count(const key_type& k) const; //获得hash表的表长 * void swap(); * resize(); * iterator lower_bound();iterator upper_bound();pair<iterator,iterator> equal_range();//上界、下届、确定区间 * * 在SGI STL中,提供了以下hash函数: * struct hash<char*> * struct hash<const char*> * struct hash<char> * struct hash<unsigned char> * struct hash<signed char> * struct hash<short> * struct hash<unsigned short> * struct hash<int> * struct hash<unsigned int> * struct hash<long> * struct hash<unsigned long> * * hash函数决定了如何划分散列表 * * * ******************************************** ** cumirror ** [email protected] ** ** ******************************************** * */ #include <hash_set> #include <iostream> struct student{ char* name; int age; char* city; char* phone; }; //自定义数据的比较函数 class stuequal{ public: bool operator() (const student& a,const student& b){ return strcmp(a.city,b.city)==0; //不允许同名,name为键值 } //将name换为city测试下 }; //自定义数据的hash函数 //typedef unsigned int size_t; struct stu_hash{ size_t operator()(const student& stu) const { unsigned long res = 0; char* s=stu.city; for( ; *s; ++s ){ res=5*res+*s; } return size_t(res); } }; //针对字符串的比较函数对象 class strequal{ public: bool operator () (const char* a,const char* b)const{ return strcmp(a,b)==0; } }; int main(){ using namespace std; hash_set<const char*,hash<const char*>,strequal> a; a.insert("tongjin"); a.insert("cumirror"); a.insert("makelaugh"); a.insert("feiguodeyun"); // hash<const char*>默认提供的hash函数对象 hash_set<const char*,hash<const char*>,strequal>::const_iterator b=a.find("tongjin"); cout<<*b<<" is "<<(b!=a.end()?"present":"not present")<<endl; // 对于自定义类型数据,使用hash相关容器时应构造hash函数对象、比较函数对象 // 注意区别hash函数对象与比较函数对象各自的作用 student s[]={ {"童进",23,"长沙","XXX"}, {"老大",23,"武汉","XXX"}, {"饺子",23,"福州","XXX"}, {"王老虎",23,"地球","XXX"}, {"周润发",23,"香港","XXX"}, {"周星星",23,"香港","XXX"}, //city重复 {"童进",23,"香港","XXX"} //name重复、city也有重复 }; hash_set<student,stu_hash,stuequal> c; c.insert(s[0]); c.insert(s[1]); c.insert(s[2]); c.insert(s[3]); c.insert(s[4]); c.insert(s[5]); c.insert(s[6]); // 注意hash容器并不能实现排序 for(hash_set<student,stu_hash,stuequal>::iterator i=c.begin();i!=c.end();i++){ cout<<i->name<<" "<<i->age<<" "<<i->city<<endl; } return 0; }