最近看关于clojure的资料,从rich Hickey(clojure的创造者)的《Persistent Data Structure and Managed Reference》中看到了关于ideal hash trees的介绍,于是找到了Phil Bagwell关于"ideal hash trees"的论文,研究了一下,并结合rich Hickey关于Persistent Ideal Hash Trees的介绍,用Java实现了一个Persistent Ideal Hash Trees(两杯咖啡加一个下午,希望不会有太多的bug),下面就是对该数据结构的简单介绍和我的实现细节,供参考。
public class PersistentIdealHashTree<K, V> { //定义分支个数 private static final int CHILD_COUNT = 32; //定义最大层,也可以通过分支个数来计算这里我直接写死了 private static final int MAX_LAYER = 6; //定义根节点,使用一个原子变量,这里也是整棵树发生变化的地方 private final AtomicReference<TreeNode> root; public PersistentIdealHashTree() { this.root = new AtomicReference<TreeNode>(new TreeNode(null, CHILD_COUNT)); } ...... }
private class TreeNode { //节点数据链表 private final NodeData data; //孩子节点,如果为叶子节点,则是一个0长度数组 private final TreeNode[] children; @SuppressWarnings("unchecked") public TreeNode(NodeData data, int childCount) { this.data = data; this.children = new PersistentIdealHashTree.TreeNode[childCount]; } ...... } private class NodeData { //数据key private final K key; //用户数据 private final V userObj; //下一个节点指针 private NodeData next; public NodeData(K key, V userObj, NodeData next) { this.key = key; this.userObj = userObj; this.next = next; } }
public void insert(K key, V userObj) { boolean success = false; while (!success) { TreeNode oldRootNode = root.get(); //insertNode将生成一棵新的树,返回根节点,不会修改原有的树 TreeNode newRootNode = insertNode(oldRootNode, 1, key.hashCode(), key, userObj); //使用原子操作比较并替换老根节点,如果老根节点发生变化,则会失败,导致重做所有操作 success = root.compareAndSet(oldRootNode, newRootNode); } }
private TreeNode insertNode(TreeNode parent, int layer, int keyHashCode, K key, V userObj) { //获取到插入节点在这层中的位置 int position = getNodePosition(layer, keyHashCode); //拷贝一个新的parent节点,只做浅拷贝 TreeNode newParent = cloneTreeNode(parent); if (parent.children.length == 0 || parent.children[position] == null) { // parent是叶子节点,或者插入节点的位置为空,就构造一个新的叶子节点,并插入newParent的对应位置 newParent.children[position] = createTreeLeafNode(key, userObj, null); } else if (parent.children[position].data == null) { // parent是中间节点,则进入下一层操作 newParent.children[position] = insertNode(parent.children[position], layer + 1, keyHashCode, key, userObj); } else { //出现哈希冲突 if (layer < MAX_LAYER) { //由于树还可以继续向下扩展,于是将现在的节点替换为一个中间节点后,进入下一层 TreeNode newChild = CreateTreeParentNode(); newChild.children[getNodePosition(layer + 1, parent.children[position].data.key.hashCode())] = parent.children[position]; newParent.children[position] = insertNode(newChild, layer + 1, keyHashCode, key, userObj); } else { //树已经不能再向下扩展,将插入的节点放入叶子节点的值链表中 newParent.children[position] = createTreeLeafNode(key, userObj, parent.children[position]); } } return newParent; }
public V get(K key) { return getNode(root.get(), 1, key.hashCode(), key); }
private V getNode(TreeNode parent, int layer, int keyHashCode, K key) { int position = getNodePosition(layer, keyHashCode); if (parent.children.length > 0 && parent.children[position] != null) { if (parent.children[position].data == null) { //中间节点,继续向下查找 return getNode(parent.children[position], layer + 1, keyHashCode, key); } else { // 由于可能存在冲突,因此使用一个循环处理 NodeData data = parent.children[position].data; do { if (data.key.equals(key)) { return data.userObj; } data = data.next; } while (data != null); } } return null; }
public V delete(K key) { boolean success = false; V userObj = null; while (!success) { TreeNode oldRootNode = root.get(); //确认是否存在删除的数据,存在则执行删除操作 userObj = getNode(oldRootNode, 1, key.hashCode(), key); if (userObj != null) { //执行删除操作,返回一个新的root,由于节点回收,可能返回空,这时需要构造一个空的新root TreeNode newRootNode = deleteNode(oldRootNode, 1, key.hashCode(), key); if (newRootNode == null) { newRootNode = CreateTreeParentNode(); } //老的root已经发生变化,需要重做 success = root.compareAndSet(oldRootNode, newRootNode); } else { success = true; } } return userObj; }
private TreeNode deleteNode(TreeNode parent, int layer, int keyHashCode, K key) { int position = getNodePosition(layer, keyHashCode); TreeNode newParent = cloneTreeNode(parent); if (parent.children[position].data == null) { //中间节点,进入下一层操作 newParent.children[position] = deleteNode(parent.children[position], layer + 1, keyHashCode, key); } else { //可能存在哈希冲突,在修改链表时采用copy-on-write方法,不改变原有链表 NodeData data = parent.children[position].data; Stack<NodeData> dataStack = new Stack<>(); while (data != null && !data.key.equals(key)) { dataStack.push(data); data = data.next; } NodeData rootData = data.next; while (!dataStack.empty()) { data = dataStack.pop(); rootData = new NodeData(data.key, data.userObj, rootData); } TreeNode newNode = null; if (rootData != null) { newNode = new TreeNode(rootData, 0); } newParent.children[position] = newNode; } //newParent的所有孩子都为空后就可以回收了 return newParent.isNullNode() ? null : newParent; }