[C++]HashMap实现STL map

用HashMap实现STL map

在STL中map是用RB Tree(也就是红黑树)来完成的。本文尝试用HashMap来实现map。

问题产生

本文主要是完成以下interface:

#ifndef HASHMAP_HPP
#define HASHMAP_HPP
#include <vector>
#include <list>
#include <utility>
#include <iostream>
#define kHASHSIZE 1024
// forward declearation
template <typename KEY, typename VALUE>
class HashMap;
template <typename KEY, typename VALUE>
class Node {
public:
  KEY getKey() const;
  VALUE getValue() const;
private:
  friend class HashMap<KEY, VALUE>;
  Node() : next(NULL), key(), value() {}
  Node(const KEY &k, const VALUE &v) : key(k), value(v), next(NULL) {}
  // cascade deletion
  ~Node() {
    if (next != NULL) delete next;
  }
  KEY key;
  VALUE value;
  Node *next;
};
template <typename KEY, typename VALUE>
class HashMap {
public:
  /** * defalut constructor */
  HashMap();
  /** * constructor using a vector of pairs * @param pairs a vector contains all the key-value pairs * @note duplicate keys in the vector leads to undefined behavior */
  HashMap(const std::vector<std::pair<KEY, VALUE> > &t_pairs);
  /** * constructor using a list of pairs * @param pairs a list contains all the key-value pairs * @note duplicate keys in the list leads to undefined behavior */
  HashMap(const std::list<std::pair<KEY, VALUE> > &t_pairs);
  /** * copy constructor * @param t_another another HashMap */
  HashMap(const HashMap &t_another);
  /** * destructor */
  ~HashMap();
  /** * assignment operator overloaded * @param t_another another HashMap */
  HashMap &operator=(const HashMap &t_another);
  /** * get current size * @return the size of this map */
  int size(void) const;
  /** * check the map is empty or not * @return if empty, true will be return */
  bool empty(void) const;
  /** * get current size * @param t_key key, the key * @param t_value value, the value * @note duplicate keys in the vector leads to a replace behavior */
  void insert(const KEY &t_key, const VALUE &t_value);
  /** * find if a key is in the map * @return if the map has the key, return true */
  bool has(const KEY &t_key) const;
  /** * remove a pair from the map * @return if success, return true */
  bool erase(const KEY &t_key);
  /** * remove all the paris in the map */
  void clear();
  /** * get all the keys * @return a vector of keys */
  std::vector<KEY> keys(void) const;
  /** * get all the values * @return a vector of values */
  std::vector<VALUE> values(void) const;
  /** * get all the pairs * @return a vector of pairs */
  std::vector<std::pair<KEY, VALUE> > items(void) const;
  /** * access a value using key-index * @param t_key a key * @return the refernce of the value * @note if the key is not in the map, a new pair will be created implicitly using the given key */
  VALUE &operator[](const KEY &t_key);
private:
  // hash function
  // inline unsigned long long HashValue(const KEY & t_key) const {
  // return reinterpret_cast<unsigned long long>(key);
  // }
  unsigned int HashValue(const KEY &t_key) const;
  inline Node<KEY, VALUE> *find(const KEY &key) {
    unsigned long long hv = HashValue(key);
    int p = hv % kHASHSIZE;
    for (Node<KEY, VALUE> *ptr = m_heads[p]; ptr->next; ptr = ptr->next)
      if (ptr->next->key == key) return ptr->next;
    return NULL;
  }
  std::vector<Node<KEY, VALUE> *> m_heads;
  int m_size;
};
#include "HashMap.cc"
#endif  // HASHMAP_HPP

并通过以下测试代码:

#include <algorithm>
#include <iostream>
#include <list>
#include <string>
#include <vector>
#include "HashMap.hpp"
template <typename KEY, typename VALUE>
struct Functor {
  void operator()(const std::pair<KEY, VALUE>& item) {
    std::cout << "{\"key\":\"" << item.first << "\",";
    std::cout << "\"value\":\"" << item.second << "\"}";
    std::cout << std::endl;
  }
};
template <typename KEY, typename VALUE>
void print_items(const HashMap<KEY, VALUE>& t_my_map) {
  std::vector<std::pair<KEY, VALUE> > items = t_my_map.items();
  Functor<KEY, VALUE> functor;
  // promise the order does not matter
  std::sort(items.begin(), items.end());
  std::cout << "empty:" << (t_my_map.empty() ? "true" : "false") << " ";
  std::cout << "size:" << t_my_map.size() << std::endl;
  std::for_each(items.begin(), items.end(), functor);
}
int main() {
  std::vector<std::pair<std::string, int> > data;
  // data.push_back(std::pair<std::string, int>("eden",123));
  // data.push_back(std::pair<std::string, int>("zion",123));
  // data.push_back(std::pair<std::string, int>("matrix",123));
  std::string temp_str;
  int temp_int;
  while (std::cin >> temp_str >> temp_int) {
    data.push_back(std::pair<std::string, int>(temp_str, temp_int));
  }
  // testing constructors
  std::cout << "testing constructors" << std::endl;
  if (true) {
    // default constructor
    HashMap<std::string, int> map0;
    print_items(map0);
    // construct from vector
    std::cout << "constructing from vector: " << std::endl;
    HashMap<std::string, int> map1(data);
    print_items(map1);
    // construct from list
    std::list<std::pair<std::string, int> > list_data(data.begin(), data.end());
    HashMap<std::string, int> map2(list_data);
    std::cout << "constructing from list: " << std::endl;
    print_items(map2);
    // copy constructor
    HashMap<std::string, int> map_copy(map2);
    std::cout << "copy constructing: " << std::endl;
    print_items(map_copy);
  }
  std::cout << std::endl;
  // testing copy assignment operator
  std::cout << "testing copy assignment operator" << std::endl;
  if (true) {
    // testing normal assignment
    std::cout << "testing normal assignment:" << std::endl;
    HashMap<std::string, int> map1(data);
    HashMap<std::string, int> map2(data);
    print_items(map2);
    // testing assign itself
    map2 = map2;
    print_items(map2);
    // testing cascade assigment
    std::cout << "testing cascade assigment" << std::endl;
    map1 = map2 = map1.operator=(map2);
    print_items(map2);
  }
  std::cout << std::endl;
  // testing modify functions
  std::cout << "testing modify functions" << std::endl;
  if (true) {
    // testing insert
    std::cout << "testing insert" << std::endl;
    HashMap<std::string, int> map1(data);
    for (int i = 0; i < data.size(); i++) {
      map1.insert(data[i].first, data[i].second);
    }
    // testing erase
    print_items(map1);
    std::cout << "testing erase" << std::endl;
    for (int i = 0; i < data.size() / 2; i++) {
      std::cout << "erasing " << data[i].first << ": ";
      std::cout << (map1.erase(data[i].first) ? "true" : "false");
      std::cout << std::endl;
    }

    print_items(map1);
    // test has
    std::cout << "testing has:" << std::endl;
    for (int i = 0; i < data.size(); i++) {
      std::cout << "has entry " << data[i].first << "? ";
      std::cout << (map1.has(data[i].first) ? "true" : "false");
      std::cout << std::endl;
    }
  }
  std::cout << std::endl;
  // testing operator []
  std::cout << "testing operator []" << std::endl;
  if (true) {
    HashMap<std::string, int> map1;
    for (int i = 0; i < data.size(); i++) {
      map1[data[i].first] = data[i].second;
    }
    for (int i = 0; i < data.size(); i++) {
      map1[data[i].first] = 0;
    }
    print_items(map1);
  }
  std::cout << std::endl;
  // testing keys and values
  std::cout << "testing keys and values" << std::endl;
  if (true) {
    HashMap<std::string, int> map1(data);
    std::vector<std::string> keys = map1.keys();
    std::vector<int> values = map1.values();
    std::sort(keys.begin(), keys.end());
    std::sort(values.begin(), values.end());
    for (int i = 0; i < keys.size(); i++) {
      std::cout << "key" << i << " " << keys[i] << std::endl;
    }
    for (int i = 0; i < values.size(); i++) {
      std::cout << "value" << i << " " << values[i] << std::endl;
    }
  }
  std::cout << std::endl;
  // testing clear
  std::cout << "testing clear" << std::endl;
  if (true) {
    HashMap<std::string, int> map1(data);
    map1.clear();
    print_items(map1);
  }
  std::cout << std::endl;
  return 0;
}

背景知识

哈希算法将任意长度的二进制值映射为固定长度的较小二进制值,这个小的二进制值称为哈希值。哈希值是一段数据唯一且极其紧凑的数值表示形式。如果散列一段明文而且哪怕只更改该段落的一个字母,随后的哈希都将产生不同的值。要找到散列为同一个值的两个不同的输入,在计算上来说基本上是不可能的。

我的理解是哈希值就是一段数据的特征值,用来特化一段数据。

一个key(string)通过hash function可以对应到一个index,然后把相应的value放入这个index里面。

[C++]HashMap实现STL map_第1张图片

所以问题的关键在于:如何计算哈希值?

哈希值计算

以下给出几种计算哈希值的算法,以及他们的效率。

unsigned int SDBMHash(char *str) {
  unsigned int hash = 0;

  while (*str) {
    // equivalent to: hash = 65599*hash + (*str++);
    hash = (*str++) + (hash << 6) + (hash << 16) - hash;
  }

  return (hash & 0x7FFFFFFF);
}

// RS Hash Function
unsigned int RSHash(char *str) {
  unsigned int b = 378551;
  unsigned int a = 63689;
  unsigned int hash = 0;

  while (*str) {
    hash = hash * a + (*str++);
    a *= b;
  }

  return (hash & 0x7FFFFFFF);
}

// JS Hash Function
unsigned int JSHash(char *str) {
  unsigned int hash = 1315423911;

  while (*str) {
    hash ^= ((hash << 5) + (*str++) + (hash >> 2));
  }

  return (hash & 0x7FFFFFFF);
}

// P. J. Weinberger Hash Function
unsigned int PJWHash(char *str) {
  unsigned int BitsInUnignedInt = (unsigned int)(sizeof(unsigned int) * 8);
  unsigned int ThreeQuarters = (unsigned int)((BitsInUnignedInt * 3) / 4);
  unsigned int OneEighth = (unsigned int)(BitsInUnignedInt / 8);
  unsigned int HighBits = (unsigned int)(0xFFFFFFFF)
                          << (BitsInUnignedInt - OneEighth);
  unsigned int hash = 0;
  unsigned int test = 0;

  while (*str) {
    hash = (hash << OneEighth) + (*str++);
    if ((test = hash & HighBits) != 0) {
      hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));
    }
  }

  return (hash & 0x7FFFFFFF);
}

// ELF Hash Function
unsigned int ELFHash(char *str) {
  unsigned int hash = 0;
  unsigned int x = 0;

  while (*str) {
    hash = (hash << 4) + (*str++);
    if ((x = hash & 0xF0000000L) != 0) {
      hash ^= (x >> 24);
      hash &= ~x;
    }
  }

  return (hash & 0x7FFFFFFF);
}

// BKDR Hash Function
unsigned int BKDRHash(char *str) {
  unsigned int seed = 131;  // 31 131 1313 13131 131313 etc..
  unsigned int hash = 0;

  while (*str) {
    hash = hash * seed + (*str++);
  }

  return (hash & 0x7FFFFFFF);
}

// DJB Hash Function
unsigned int DJBHash(char *str) {
  unsigned int hash = 5381;

  while (*str) {
    hash += (hash << 5) + (*str++);
  }

  return (hash & 0x7FFFFFFF);
}

// AP Hash Function
unsigned int APHash(char *str) {
  unsigned int hash = 0;
  int i;

  for (i = 0; *str; i++) {
    if ((i & 1) == 0) {
      hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3));
    } else {
      hash ^= (~((hash << 11) ^ (*str++) ^ (hash >> 5)));
    }
  }

  return (hash & 0x7FFFFFFF);
}

问题解决

我采用的是第一种哈希值的算法。

//
// Hashmap.cpp
// Hash
//
// Created by 颜泽鑫 on 6/4/16.
// Copyright © 2016 颜泽鑫. All rights reserved.
//

#include "HashMap.hpp"
template <typename KEY, typename VALUE>
KEY Node<KEY, VALUE>::getKey() const {
  return key;
}

template <typename KEY, typename VALUE>
VALUE Node<KEY, VALUE>::getValue() const {
  return value;
}

template <typename KEY, typename VALUE>
HashMap<KEY, VALUE>::HashMap() {
  m_size = 0;
  for (int i = 0; i != kHASHSIZE; i++) {
    m_heads.push_back(new Node<KEY, VALUE>("", 0));
  }
}

template <typename KEY, typename VALUE>
HashMap<KEY, VALUE>::HashMap(
    const std::vector<std::pair<KEY, VALUE> > &t_pairs) {
  for (int i = 0; i != kHASHSIZE; i++) {
    m_heads.push_back(new Node<KEY, VALUE>("", 0));
  }
  m_size = 0;
  for (auto pair : t_pairs) {
    insert(pair.first, pair.second);
  }
}

template <typename KEY, typename VALUE>
HashMap<KEY, VALUE>::HashMap(const std::list<std::pair<KEY, VALUE> > &t_pairs) {
  for (int i = 0; i != kHASHSIZE; i++) {
    m_heads.push_back(new Node<KEY, VALUE>("", 0));
  }
  m_size = 0;
  for (auto pair : t_pairs) {
    insert(pair.first, pair.second);
  }
}

template <typename KEY, typename VALUE>
HashMap<KEY, VALUE>::HashMap(const HashMap &t_another) {
  for (int i = 0; i != kHASHSIZE; i++) {
    m_heads.push_back(new Node<KEY, VALUE>("", 0));
  }
  m_size = 0;
  std::vector<std::pair<KEY, VALUE> > temp = t_another.items();
  for (int i = 0; i != temp.size(); i++) {
    insert(temp[i].first, temp[i].second);
  }
}

template <typename KEY, typename VALUE>
HashMap<KEY, VALUE>::~HashMap() {
  for (int i = 0; i != kHASHSIZE; i++) {
    delete m_heads[i];
  }
}

template <typename KEY, typename VALUE>
HashMap<KEY, VALUE> &HashMap<KEY, VALUE>::operator=(
    const HashMap<KEY, VALUE> &t_another) {
  if (this == &t_another) {
    return *this;
  } else {
    clear();
    m_size = 0;
    std::vector<std::pair<KEY, VALUE> > temp = t_another.items();
    for (auto pair : temp) {
      insert(pair.first, pair.second);
    }
  }
  return *this;
}

template <typename KEY, typename VALUE>
int HashMap<KEY, VALUE>::size() const {
  return m_size;
}

template <typename KEY, typename VALUE>
bool HashMap<KEY, VALUE>::empty() const {
  return m_size == 0;
}

template <typename KEY, typename VALUE>
void HashMap<KEY, VALUE>::insert(const KEY &t_key, const VALUE &t_value) {
  Node<KEY, VALUE> *temp = find(t_key);
  if (temp == NULL) {
    int p = HashValue(t_key) % kHASHSIZE;
    temp = m_heads[p];
    while (temp->next != nullptr) {
      temp = temp->next;
    }
    temp->next = new Node<KEY, VALUE>(t_key, t_value);
    m_size++;
  } else {
    temp->value = t_value;
  }
}

template <typename KEY, typename VALUE>
bool HashMap<KEY, VALUE>::has(const KEY &t_key) const {
  unsigned long long hv = HashValue(t_key);
  int p = hv % kHASHSIZE;
  for (Node<KEY, VALUE> *ptr = m_heads[p]; ptr->next; ptr = ptr->next) {
    if (ptr->next->key == t_key) {
      return true;
    }
  }
  return false;
}

template <typename KEY, typename VALUE>
bool HashMap<KEY, VALUE>::erase(const KEY &t_key) {
  Node<KEY, VALUE> *temp = find(t_key);
  if (temp == NULL) {
    return false;
  } else {
    if (temp->next == NULL) {
      int p = HashValue(t_key) % kHASHSIZE;
      Node<KEY, VALUE> *del = m_heads[p]->next;
      delete del;
      m_heads[p]->next = NULL;
    } else {
      temp->key = temp->next->key;
      temp->value = temp->next->value;
      if (temp->next->next == NULL) {
        Node<KEY, VALUE> *del = temp;
        temp->next = NULL;
        delete del;
      } else {
        Node<KEY, VALUE> *del = temp->next;
        temp->next = temp->next->next;
        delete del;
      }
    }
    m_size--;
    return true;
  }
}

template <typename KEY, typename VALUE>
void HashMap<KEY, VALUE>::clear() {
  for (int i = 0; i != kHASHSIZE; i++) {
    Node<KEY, VALUE> *temp = m_heads[i];
    if (temp->next != nullptr) {
      Node<KEY, VALUE> *del = temp->next;
      temp->next = nullptr;
      delete del;
    }
  }
  m_size = 0;
}

template <typename KEY, typename VALUE>
std::vector<KEY> HashMap<KEY, VALUE>::keys(void) const {
  std::vector<KEY> ans;
  for (int i = 0; i != kHASHSIZE; i++) {
    Node<KEY, VALUE> *temp = m_heads[i];
    if (temp->next == nullptr) {
      continue;
    } else {
      temp = temp->next;
      while (temp != nullptr) {
        ans.push_back(temp->key);
        temp = temp->next;
      }
    }
  }
  return ans;
}

template <typename KEY, typename VALUE>
std::vector<VALUE> HashMap<KEY, VALUE>::values(void) const {
  std::vector<VALUE> ans;
  for (int i = 0; i != kHASHSIZE; i++) {
    Node<KEY, VALUE> *temp = m_heads[i];
    if (temp->next == nullptr) {
      continue;
    } else {
      temp = temp->next;
      while (temp != nullptr) {
        ans.push_back(temp->value);
        temp = temp->next;
      }
    }
  }
  return ans;
}

template <typename KEY, typename VALUE>
std::vector<std::pair<KEY, VALUE> > HashMap<KEY, VALUE>::items(void) const {
  std::vector<std::pair<KEY, VALUE> > ans;
  for (int i = 0; i != kHASHSIZE; i++) {
    Node<KEY, VALUE> *temp = m_heads[i];
    if (temp->next == nullptr) {
      continue;
    } else {
      temp = temp->next;
      while (temp != NULL) {
        ans.push_back(make_pair(temp->key, temp->value));
        temp = temp->next;
      }
    }
  }
  return ans;
}

template <typename KEY, typename VALUE>
VALUE &HashMap<KEY, VALUE>::operator[](const KEY &t_key) {
  if (has(t_key)) {
    return find(t_key)->value;
  } else {
    insert(t_key, 0);
    return find(t_key)->value;
  }
}

template <typename KEY, typename VALUE>
unsigned int HashMap<KEY, VALUE>::HashValue(const KEY &t_key) const {
  unsigned int hash = 0;
  int i = 0;
  while (t_key[i] != '\0') {
    // equivalent to: hash = 65599*hash + (*str++);
    hash = t_key[i++] + (hash << 6) + (hash << 16) - hash;
  }
  return (hash & 0x7FFFFFFF);
}

你可能感兴趣的:([C++]HashMap实现STL map)