JS数据结构和算法 - 字典和散列

前言

集合、字典和散列都可以存储不重复的值。但是集合我们存储的是 value、字典和散列存储的是[键, 值]对。但是字典和散列的实现方式略有不同。

字典（Map）

在字典中存储的是键值对，通过对应的键来查找值。字典也称作映射。key 不会重复。

function Dictionary() {
  let items = {};
  this.has = function(key) {
    return items.hasOwnProperty(key);
  };
  this.set = function(key, value) {
    if (!this.has(key)) {
      items[key] = value;
      return true;
    }
    return false;
  };

  this.remove = function(key) {
    if (this.has(key)) {
      delete items[key];
      return true;
    }
    return false;
  };
  this.clear = function() {
    items = {};
  };
  this.size = function() {
    return Object.keys(items).length;
  };
  this.values = function() {
    return Object.values(items);
  };
  this.keys = function() {
    return Object.keys(items);
  };
  this.get = function(key) {
    return items[key];
  };
  this.getItems = function() {
    return items;
  };
}

散列表

散列算法的作用是尽可能快的在数据结构中找到一个值。

散列函数的作用就是给定一个键值，然后返回在表中的位置。

创建一个散列表

先定义一个 hash 函数，最常见的散列函数 - 'lose lose'散列函数，就是将每个键值中的每个字母的 ASC 值相加

function HashTable() {
  let table = [];
  // 散列函数
  function loseloseHashCode(key) {
    let hash = 0;
    for (let i = 0; i < key.length; i++) {
      hash += key.charCodeAt(i);
    }
    return hash % 37; // 为了得到一个较小的数值，会使用hash值和任意一个数做除法的余数
  }
  this.put = function(key, value) {
    const position = loseloseHashCode(key);
    table[position] = value;
  };
  this.get = function(key) {
    return table[loseloseHashCode(key)];
  };
  this.remove = function(key) {
    table[loseloseHashCode(key)] = undefined;
  };
}

处理散列表中的冲突

有时候，一些键会有相同的散列值。不同的值在散列表中对应相同位置的时候，我们称其为冲突。此时后面的值就是覆盖前面的值。

处理冲突的方式：

分离链接
线性探查
双散列法

分离链接(拉链法)

分离链接法包括为散列表的每一个位置创建一个链表并将元素存储在里面。它是解决冲突的最简单的方法，但是它在 HashTable 实例之外还需要额外的存储空间。每个位置上存储的都是 LinkedList 的实例

对于分离链接和线性探查来说，只需要重写三个方法：put、get 和 remove。这三个方法在每种技术实现中都是不同的。

为了实现一个使用了分离链接的 HashTable 实例，我们需要一个新的辅助类来表示将要加入 LinkedList 实例的元素。我们管它叫 ValuePair 类（在 HashTable 类内部定义）

function HashTable() {
  let table = [];
  // 散列函数
  function loseloseHashCode(key) {
    let hash = 0;
    for (let i = 0; i < key.length; i++) {
      hash += key.charCodeAt(i);
    }
    return hash % 37; // 为了得到一个较小的数值，会使用hash值和任意一个数做除法的余数
  }

  function ValuePair(key, value) {
    this.key = key;
    this.value = value;
    this.toString = function() {
      return `[${key}-${value}]`;
    };
  }

  this.put = function(key, value) {
    const position = loseloseHashCode(key);
    if (table[position] === undefined) {
      table[position] = new LinkedList();
    }
    table[position].append(new ValuePair(key, value));
  };
  this.get = function(key) {
    const position = loseloseHashCode(key);
    if (table[position] !== undefined) {
      let current = table[position].getHead();
      while (current) {
        if (current.value.key === key) {
          return current.value.value;
        }
        current = current.next;
      }
    }
    return undefined;
  };
  this.remove = function(key) {
    const position = loseloseHashCode(key);
    if (table[position] !== undefined) {
      let current = table[position].getHead();
      while (current) {
        if (current.value.key === key) {
          table[position].remove(current.value);
          if (table[position].isEmpty()) {
            table[position] = undefined;
          }
          return true;
        }
        current = current.next;
      }
    }
    return false;
  };
}

线性探查

另一种解决冲突的方法是线性探查。当想向表中某个位置加入一个新元素的时候，如果索引为 index 的位置已经被占据了，就尝试 index+1 的位置。如果 index+1 的位置也被占据了，就尝试 index+2 的位置，以此类推。

function HashTable() {
  let table = [];
  // 散列函数
  function loseloseHashCode(key) {
    let hash = 0;
    for (let i = 0; i < key.length; i++) {
      hash += key.charCodeAt(i);
    }
    return hash % 37; // 为了得到一个较小的数值，会使用hash值和任意一个数做除法的余数
  }

  function ValuePair(key, value) {
    this.key = key;
    this.value = value;
    this.toString = function() {
      return `[${key}-${value}]`;
    };
  }

  this.put = function(key, value) {
    let position = loseloseHashCode(key);
    if (table[position] === undefined) {
      table[position] = new ValuePair(key, value);
    } else {
      let index = ++position;
      while (table[index] !== undefined) {
        index++;
      }
      table[index] = new ValuePair(key, value);
    }
  };

  this.get = function(key) {
    const position = loseloseHashCode(key);
    if (table[position] !== undefined) {
      if (table[position].key === key) {
        return table[position].value;
      } else {
        let index = ++position;
        while (table[index] === undefined || table[index].key !== key) {
          index++;
        }
        if (table[index].key === key) {
          return table[index].value;
        }
      }
    }
    return undefined;
  };
  this.remove = function(key) {
    const position = loseloseHashCode(key);
    if (table[position] !== undefined) {
      if (table[position].key === key) {
        table[position] = undefined;
      } else {
        let index = ++position;
        while (table[index] === undefined || table[index].key !== key) {
          index++;
        }
        if (table[index].key === key) {
          table[index] = undefined;
        }
      }
      return true;
    }
    return false;
  };
}

更好的散列函数

我们实现的“lose lose”散列函数并不是一个表现良好的散列函数，因为它会产生太多的冲突。如果我们使用这个函数的话，会产生各种各样的冲突。一个表现良好的散列函数是由几个方面构成的：插入和检索元素的时间（即性能），当然也包括较低的冲突可能性

另一个可以实现的比“lose lose”更好的散列函数是 djb2：

function djb2HashCode(key) {
  // hash 初始化为一个质数，大多数用的都是5381
  let hash = 5381;
  for (let i = 0; i < key.length; i++) {
    hash = hash * 33 + key.charCodeAt(i);
  }
  return hash % 1013;
}

这并不是最好的散列函数，但这是最被社区推荐的散列函数之一。

常见的三种哈希结构

数组
集合(set)
映射(map)