STL源码剖析 [容器](二十)[stl_hash_set.h]



   hash_set的底层数据结构是哈希表,因此要深入了解hash_set,必须先分析哈希表。哈希表是根据关键码值(Key-Value)而直接进行访问的数据结构,它用哈希函数处理数据得到关键码值,关键码值对应表中一个特定位置再由应该位置来访问记录,这样可以在时间复杂性度为O(1)内访问到数据。但是很有可能出现多个数据经哈希函数处理后得到同一个关键码——这就产生了冲突,解决冲突的方法也有很多,采用最方便最有效的一种——链地址法,当有冲突发生时将具同一关键码的数据组成一个链表。下图展示了链地址法的使用:

STL源码剖析 [容器](二十)[stl_hash_set.h]_第1张图片

前面已经介绍过哈希表了,这里直接看hash_set的源码:

// Filename:    stl_hash_set.h

// Comment By:  凝霜
// E-mail:      [email protected]
// Blog:        http://blog.csdn.net/mdl13412

// hash_set和hash_multiset是对hashtable的简单包装, 很容易理解

/*
 * Copyright (c) 1996
 * Silicon Graphics Computer Systems, Inc.
 *
 * Permission to use, copy, modify, distribute and sell this software
 * and its documentation for any purpose is hereby granted without fee,
 * provided that the above copyright notice appear in all copies and
 * that both that copyright notice and this permission notice appear
 * in supporting documentation.  Silicon Graphics makes no
 * representations about the suitability of this software for any
 * purpose.  It is provided "as is" without express or implied warranty.
 *
 *
 * Copyright (c) 1994
 * Hewlett-Packard Company
 *
 * Permission to use, copy, modify, distribute and sell this software
 * and its documentation for any purpose is hereby granted without fee,
 * provided that the above copyright notice appear in all copies and
 * that both that copyright notice and this permission notice appear
 * in supporting documentation.  Hewlett-Packard Company makes no
 * representations about the suitability of this software for any
 * purpose.  It is provided "as is" without express or implied warranty.
 *
 */

/* NOTE: This is an internal header file, included by other STL headers.
 *   You should not attempt to use it directly.
 */

#ifndef __SGI_STL_INTERNAL_HASH_SET_H
#define __SGI_STL_INTERNAL_HASH_SET_H

__STL_BEGIN_NAMESPACE

#if defined(__sgi) && !defined(__GNUC__) && (_MIPS_SIM != _MIPS_SIM_ABI32)
#pragma set woff 1174
#endif

// 如果编译器不能根据前面模板参数推导出后面使用的默认参数类型,
// 那么就需要手工指定, 并且对于基本的数据类型, 在<stl_hash_fun.h>
// 中都提供hash函数
#ifndef __STL_LIMITED_DEFAULT_TEMPLATES
template <class Value, class HashFcn = hash<Value>,
          class EqualKey = equal_to<Value>,
          class Alloc = alloc>
#else
template <class Value, class HashFcn, class EqualKey, class Alloc = alloc>
#endif
class hash_set
{
private:
  // identity<Value>用于析出Value
  typedef hashtable<Value, Value, HashFcn, identity<Value>,
                    EqualKey, Alloc> ht;
  ht rep;       // 其实hash_set就是hashtable的简单封装

public:
  typedef typename ht::key_type key_type;
  typedef typename ht::value_type value_type;
  typedef typename ht::hasher hasher;
  typedef typename ht::key_equal key_equal;

  // 注意: reference, pointer, iterator都为const, 因为不能修改hashtable
  // 内部的元素, 否则会导致hashtable失效
  typedef typename ht::size_type size_type;
  typedef typename ht::difference_type difference_type;
  typedef typename ht::const_pointer pointer;
  typedef typename ht::const_pointer const_pointer;
  typedef typename ht::const_reference reference;
  typedef typename ht::const_reference const_reference;

  typedef typename ht::const_iterator iterator;
  typedef typename ht::const_iterator const_iterator;

  // 返回hash相关函数
  hasher hash_funct() const { return rep.hash_funct(); }
  key_equal key_eq() const { return rep.key_eq(); }

public:
  hash_set() : rep(100, hasher(), key_equal()) {}
  explicit hash_set(size_type n) : rep(n, hasher(), key_equal()) {}
  hash_set(size_type n, const hasher& hf) : rep(n, hf, key_equal()) {}
  hash_set(size_type n, const hasher& hf, const key_equal& eql)
    : rep(n, hf, eql) {}

#ifdef __STL_MEMBER_TEMPLATES
  template <class InputIterator>
  hash_set(InputIterator f, InputIterator l)
    : rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); }
  template <class InputIterator>
  hash_set(InputIterator f, InputIterator l, size_type n)
    : rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); }
  template <class InputIterator>
  hash_set(InputIterator f, InputIterator l, size_type n,
           const hasher& hf)
    : rep(n, hf, key_equal()) { rep.insert_unique(f, l); }
  template <class InputIterator>
  hash_set(InputIterator f, InputIterator l, size_type n,
           const hasher& hf, const key_equal& eql)
    : rep(n, hf, eql) { rep.insert_unique(f, l); }
#else

  hash_set(const value_type* f, const value_type* l)
    : rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); }
  hash_set(const value_type* f, const value_type* l, size_type n)
    : rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); }
  hash_set(const value_type* f, const value_type* l, size_type n,
           const hasher& hf)
    : rep(n, hf, key_equal()) { rep.insert_unique(f, l); }
  hash_set(const value_type* f, const value_type* l, size_type n,
           const hasher& hf, const key_equal& eql)
    : rep(n, hf, eql) { rep.insert_unique(f, l); }

  hash_set(const_iterator f, const_iterator l)
    : rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); }
  hash_set(const_iterator f, const_iterator l, size_type n)
    : rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); }
  hash_set(const_iterator f, const_iterator l, size_type n,
           const hasher& hf)
    : rep(n, hf, key_equal()) { rep.insert_unique(f, l); }
  hash_set(const_iterator f, const_iterator l, size_type n,
           const hasher& hf, const key_equal& eql)
    : rep(n, hf, eql) { rep.insert_unique(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */

public:
  // 下面都是对hashtable的简单封装, 见<stl_hashtable.h>
  size_type size() const { return rep.size(); }
  size_type max_size() const { return rep.max_size(); }
  bool empty() const { return rep.empty(); }
  void swap(hash_set& hs) { rep.swap(hs.rep); }

  friend bool operator== __STL_NULL_TMPL_ARGS (const hash_set&,
                                               const hash_set&);
  iterator begin() const { return rep.begin(); }
  iterator end() const { return rep.end(); }

public:
  pair<iterator, bool> insert(const value_type& obj)
    {
      pair<typename ht::iterator, bool> p = rep.insert_unique(obj);
      r
      eturn pair<iterator, bool>(p.first, p.second);
    }
#ifdef __STL_MEMBER_TEMPLATES
  template <class InputIterator>
  void insert(InputIterator f, InputIterator l) { rep.insert_unique(f,l); }
#else
  void insert(const value_type* f, const value_type* l) {
    rep.insert_unique(f,l);
  }
  void insert(const_iterator f, const_iterator l) {rep.insert_unique(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */

  // hash_set和set一样, 都不允许key重复
  pair<iterator, bool> insert_noresize(const value_type& obj)
  {
    pair<typename ht::iterator, bool> p = rep.insert_unique_noresize(obj);
    return pair<iterator, bool>(p.first, p.second);
  }

  iterator find(const key_type& key) const { return rep.find(key); }

  size_type count(const key_type& key) const { return rep.count(key); }

  pair<iterator, iterator> equal_range(const key_type& key) const
    { return rep.equal_range(key); }

  size_type erase(const key_type& key) {return rep.erase(key); }
  void erase(iterator it) { rep.erase(it); }
  void erase(iterator f, iterator l) { rep.erase(f, l); }
  void clear() { rep.clear(); }

public:
  void resize(size_type hint) { rep.resize(hint); }
  size_type bucket_count() const { return rep.bucket_count(); }
  size_type max_bucket_count() const { return rep.max_bucket_count(); }
  size_type elems_in_bucket(size_type n) const
    { return rep.elems_in_bucket(n); }
};

template <class Value, class HashFcn, class EqualKey, class Alloc>
inline bool operator==(const hash_set<Value, HashFcn, EqualKey, Alloc>& hs1,
                       const hash_set<Value, HashFcn, EqualKey, Alloc>& hs2)
{
  return hs1.rep == hs2.rep;
}

// 如果编译器支持模板函数特化优先级
// 那么将全局的swap实现为使用hash_set私有的swap以提高效率
#ifdef __STL_FUNCTION_TMPL_PARTIAL_ORDER

template <class Val, class HashFcn, class EqualKey, class Alloc>
inline void swap(hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
                 hash_set<Val, HashFcn, EqualKey, Alloc>& hs2)
{
  hs1.swap(hs2);
}

#endif /* __STL_FUNCTION_TMPL_PARTIAL_ORDER */

// hash_multiset和hash_set除去允许key重复外, 其余性质一致
#ifndef __STL_LIMITED_DEFAULT_TEMPLATES
template <class Value, class HashFcn = hash<Value>,
          class EqualKey = equal_to<Value>,
          class Alloc = alloc>
#else
template <class Value, class HashFcn, class EqualKey, class Alloc = alloc>
#endif
class hash_multiset
{
private:
  typedef hashtable<Value, Value, HashFcn, identity<Value>,
                    EqualKey, Alloc> ht;
  ht rep;

public:
  typedef typename ht::key_type key_type;
  typedef typename ht::value_type value_type;
  typedef typename ht::hasher hasher;
  typedef typename ht::key_equal key_equal;

  typedef typename ht::size_type size_type;
  typedef typename ht::difference_type difference_type;
  typedef typename ht::const_pointer pointer;
  typedef typename ht::const_pointer const_pointer;
  typedef typename ht::const_reference reference;
  typedef typename ht::const_reference const_reference;

  typedef typename ht::const_iterator iterator;
  typedef typename ht::const_iterator const_iterator;

  hasher hash_funct() const { return rep.hash_funct(); }
  key_equal key_eq() const { return rep.key_eq(); }

public:
  hash_multiset() : rep(100, hasher(), key_equal()) {}
  explicit hash_multiset(size_type n) : rep(n, hasher(), key_equal()) {}
  hash_multiset(size_type n, const hasher& hf) : rep(n, hf, key_equal()) {}
  hash_multiset(size_type n, const hasher& hf, const key_equal& eql)
    : rep(n, hf, eql) {}

#ifdef __STL_MEMBER_TEMPLATES
  template <class InputIterator>
  hash_multiset(InputIterator f, InputIterator l)
    : rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); }
  template <class InputIterator>
  hash_multiset(InputIterator f, InputIterator l, size_type n)
    : rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); }
  template <class InputIterator>
  hash_multiset(InputIterator f, InputIterator l, size_type n,
                const hasher& hf)
    : rep(n, hf, key_equal()) { rep.insert_equal(f, l); }
  template <class InputIterator>
  hash_multiset(InputIterator f, InputIterator l, size_type n,
                const hasher& hf, const key_equal& eql)
    : rep(n, hf, eql) { rep.insert_equal(f, l); }
#else

  hash_multiset(const value_type* f, const value_type* l)
    : rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); }
  hash_multiset(const value_type* f, const value_type* l, size_type n)
    : rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); }
  hash_multiset(const value_type* f, const value_type* l, size_type n,
                const hasher& hf)
    : rep(n, hf, key_equal()) { rep.insert_equal(f, l); }
  hash_multiset(const value_type* f, const value_type* l, size_type n,
                const hasher& hf, const key_equal& eql)
    : rep(n, hf, eql) { rep.insert_equal(f, l); }

  hash_multiset(const_iterator f, const_iterator l)
    : rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); }
  hash_multiset(const_iterator f, const_iterator l, size_type n)
    : rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); }
  hash_multiset(const_iterator f, const_iterator l, size_type n,
                const hasher& hf)
    : rep(n, hf, key_equal()) { rep.insert_equal(f, l); }
  hash_multiset(const_iterator f, const_iterator l, size_type n,
                const hasher& hf, const key_equal& eql)
    : rep(n, hf, eql) { rep.insert_equal(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */

public:
  size_type size() const { return rep.size(); }
  size_type max_size() const { return rep.max_size(); }
  bool empty() const { return rep.empty(); }
  void swap(hash_multiset& hs) { rep.swap(hs.rep); }
  friend bool operator== __STL_NULL_TMPL_ARGS (const hash_multiset&,
                                               const hash_multiset&);

  iterator begin() const { return rep.begin(); }
  iterator end() const { return rep.end(); }

public:
  iterator insert(const value_type& obj) { return rep.insert_equal(obj); }
#ifdef __STL_MEMBER_TEMPLATES
  template <class InputIterator>
  void insert(InputIterator f, InputIterator l) { rep.insert_equal(f,l); }
#else
  void insert(const value_type* f, const value_type* l) {
    rep.insert_equal(f,l);
  }
  void insert(const_iterator f, const_iterator l) { rep.insert_equal(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */
  iterator insert_noresize(const value_type& obj)
    { return rep.insert_equal_noresize(obj); }

  iterator find(const key_type& key) const { return rep.find(key); }

  size_type count(const key_type& key) const { return rep.count(key); }

  pair<iterator, iterator> equal_range(const key_type& key) const
    { return rep.equal_range(key); }

  size_type erase(const key_type& key) {return rep.erase(key); }
  void erase(iterator it) { rep.erase(it); }
  void erase(iterator f, iterator l) { rep.erase(f, l); }
  void clear() { rep.clear(); }

public:
  void resize(size_type hint) { rep.resize(hint); }
  size_type bucket_count() const { return rep.bucket_count(); }
  size_type max_bucket_count() const { return rep.max_bucket_count(); }
  size_type elems_in_bucket(size_type n) const
    { return rep.elems_in_bucket(n); }
};

template <class Val, class HashFcn, class EqualKey, class Alloc>
inline bool operator==(const hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs1,
                       const hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs2)
{
  return hs1.rep == hs2.rep;
}

// 如果编译器支持模板函数特化优先级
// 那么将全局的swap实现为使用hash_multiset私有的swap以提高效率
#ifdef __STL_FUNCTION_TMPL_PARTIAL_ORDER

template <class Val, class HashFcn, class EqualKey, class Alloc>
inline void swap(hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs1,
                 hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs2)
{
  hs1.swap(hs2);
}

#endif /* __STL_FUNCTION_TMPL_PARTIAL_ORDER */

#if defined(__sgi) && !defined(__GNUC__) && (_MIPS_SIM != _MIPS_SIM_ABI32)
#pragma reset woff 1174
#endif

__STL_END_NAMESPACE

#endif /* __SGI_STL_INTERNAL_HASH_SET_H */

// Local Variables:
// mode:C++
// End:<span style="font-family:Microsoft YaHei;font-size:18px;">
</span>

示例1:

#include<iostream>
#include<string>
#include<iterator>
#include<algorithm>
#include<hash_set>
#include<windows.h>

using namespace std;

void main(){

	const int N= 3;
	int s1[N]={1,2,3};
	int s2[N]={4,5,6};
	hash_set<int>sa(s1,s1+N);//申明一个集合sa,元素为数组s1
	hash_set<int>sb(s2,s2+N);//申明一个集合sb,元素为数组s2
	hash_set<int> sc;//申明一个集合sc,为空集合

	ostream_iterator<int> output(cout," ");
	cout<<"输出集合sa的元素:";
	copy(sa.begin(),sa.end(),output);

	cout<<"\n\n输出集合sb的元素:";
	copy(sb.begin(),sb.end(),output);
	cout<<endl;

	system("pause");

}


示例2:

/*
 *
 ************************************************************************************
 *      hash_set哈希集合容器的基础说明:
 ************************************************************************************
 *
 * hash_set哈希集合容器:使用hashtable数据结构的具有高效数据检索的关联容器
 * 
 * 不提供反向迭代器,只有前向迭代器iterator和const_iterator
 * 不允许插入重复的元素键值
 * Hashed Associative Container  Simple Associative Container   Unique Associative Container
 *
 * 目前还不是C++的标准容器,只是SGI C++ STL的一个扩展容器
 * 使用hash_set必须使用宏语句#include <hash_set>          
 * 
 **************************************************************************************
 *
 * 创建hash_set对象:
 * 1.hash_set<int> hs;        //键值比较使用默认的函数对象equal_to<Value>
 * 2.hash_set(size_type n);      //在质数列表中找出第一个大于等于n的质数作为表长:hash_set<int> hs(100);
 *  3.hash_set(size_type n,const hasher& h);  //hash函数对象为h
 * 4.hash_set(size_type n,const hasher& h,const key_equal& k);//键值比较函数对象k         
 * 5.hash_set(const hash_set& h);     //用一个hash集合容器拷贝生成另一个hash集合容器:hash_set<int> hs2(hs); 
 *
 **************************************************************************************
 *
 * 元素的插入
 * //typedef pair<const key,T> value_type;
 * pair<iterator,bool> insert(const value_type& v);//second:返回true/false插入成功标志   
 * void insert(iterator pos,const value_type& v);
 *
 **************************************************************************************
 *
 * 元素的删除
 * void erase(iterator pos);
 * size_type erase(const key_type& k);     //删除等于键值k的元素
 * void erase(first,last);        //删除[first,last)区间的元素
 * void clear();
 *
 **************************************************************************************
 *
 * 访问与搜索
 *
 * iterator begin();iterator end();     //不会将元素排序遍历出来
 *
 * iterator find(const key_type& k) const;    //对于非默认类型如char*,在搜素时应定义相关的函数对象
 *
 * 其它常用函数
 * bool empty() const;
 * size_type size() const;
 * size_type bucket_count(const key_type& k) const; //获得hash表的表长
 * void swap();
 * resize();
 * iterator lower_bound();iterator upper_bound();pair<iterator,iterator> equal_range();//上界、下届、确定区间
 *
 * 在SGI STL中,提供了以下hash函数: 
 * struct hash<char*> 
 * struct hash<const char*> 
 * struct hash<char> 
 * struct hash<unsigned char> 
 * struct hash<signed char> 
 * struct hash<short> 
 * struct hash<unsigned short> 
 * struct hash<int> 
 * struct hash<unsigned int> 
 * struct hash<long> 
 * struct hash<unsigned long> 
 *
 * hash函数决定了如何划分散列表
 *
 *
 *
 ********************************************
 **   cumirror ** [email protected] **    **
 ********************************************
 *
 */

#include <hash_set>
#include <iostream>


struct student{
 char* name;
 int age;
 char* city;
 char* phone;
};
//自定义数据的比较函数
class stuequal{
public:
 bool operator() (const student& a,const student& b){
  return strcmp(a.city,b.city)==0;      //不允许同名,name为键值
 }               //将name换为city测试下
};
//自定义数据的hash函数
//typedef unsigned int size_t;
struct stu_hash{
 size_t operator()(const student& stu) const
 { 
  unsigned long res = 0;
  char* s=stu.city;
  for( ; *s; ++s ){
   res=5*res+*s;
  }
  return size_t(res);
 } 
};

//针对字符串的比较函数对象
class strequal{
public:
 bool operator () (const char* a,const char* b)const{
  return strcmp(a,b)==0;         
 }
};

int main(){
 using namespace std;

 hash_set<const char*,hash<const char*>,strequal> a;
 a.insert("tongjin");
 a.insert("cumirror");
 a.insert("makelaugh");
 a.insert("feiguodeyun");

// hash<const char*>默认提供的hash函数对象
 hash_set<const char*,hash<const char*>,strequal>::const_iterator b=a.find("tongjin");
 cout<<*b<<" is "<<(b!=a.end()?"present":"not present")<<endl;

// 对于自定义类型数据,使用hash相关容器时应构造hash函数对象、比较函数对象
// 注意区别hash函数对象与比较函数对象各自的作用
 student s[]={
  {"童进",23,"长沙","XXX"},
  {"老大",23,"武汉","XXX"},
  {"饺子",23,"福州","XXX"},
  {"王老虎",23,"地球","XXX"},
  {"周润发",23,"香港","XXX"},
  {"周星星",23,"香港","XXX"},   //city重复
  {"童进",23,"香港","XXX"}   //name重复、city也有重复
 };         

 hash_set<student,stu_hash,stuequal> c;
 c.insert(s[0]);
 c.insert(s[1]);
 c.insert(s[2]);
 c.insert(s[3]);
 c.insert(s[4]);
 c.insert(s[5]);
 c.insert(s[6]);
// 注意hash容器并不能实现排序
 for(hash_set<student,stu_hash,stuequal>::iterator i=c.begin();i!=c.end();i++){
  cout<<i->name<<" "<<i->age<<" "<<i->city<<endl;
 }
 return 0;
}




你可能感兴趣的:(STL源码剖析 [容器](二十)[stl_hash_set.h])