使用伸展树(SplayTree)统计单词频率的Java实现

伸展树(Splay Tree)是特殊的二叉搜索树。

伸展树在查询时进行旋转,使得刚刚查到的节点成为树根,越频繁使用的节点会越靠近树根,这样可以加快访问速度。关于伸展树的介绍很多,在了解二叉树和AVL树后,伸展树的原理会比较好理解。本文不再介绍伸展树,而是统计各个单词在一篇文章中出现的频率,并将它们构造成伸展树。


1、构造二叉搜索树

二叉搜索树节点:
/**
 * Node of Binary Search Tree
 * @author cuiods
 */
public class BSTNode> {
    protected T data;
    protected BSTNode left, right;

    public BSTNode() {
        this(null, null, null);
    }

    public BSTNode(T data) {
        this(data, null, null);
    }

    public BSTNode(T data, BSTNode left, BSTNode right) {
        this.data = data;
        this.left = left;
        this.right = right;
    }
}
二叉搜索树:
/**
 * Binary Search Tree
 * @author cuiods
 */
public class BSTree> {
    protected BSTNode root;

    public T search(T data) {
        return search(root, data);
    }

    public void insert(T data) {
        BSTNode p = root, prev = null;
        while (p != null) {
            prev = p;
            if (p.data.compareTo(data) < 0) {
                p = p.right;
            } else {
                p = p.left;
            }
        }
        if (root == null) {
            root = new BSTNode(data);
        } else if (prev.data.compareTo(data) < 0) {
            prev.right = new BSTNode(data);
        } else {
            prev.left = new BSTNode(data);
        }
    }

    public void inorder() {
        inorder(root);
    }

    protected void inorder(BSTNode node) {
        if (node != null) {
            inorder(node.left);
            visit(node.data);
            inorder(node.right);
        }
    }

    protected T search(BSTNode p, T data) {
        while (p != null) {
            if (data.equals(p.data)) {
                return data;
            } else if (data.compareTo(p.data) < 0) {
                p = p.left;
            } else {
                p = p.right;
            }
        }
        return null;
    }

    protected void visit(T node) {
        System.out.println(node.toString() + " ");
    }
}

2、构造伸展树

主要实现的是伸展树的插入和搜索方法,最核心的是伸展方法,借鉴:
http://www.cnblogs.com/skywang12345/p/3604286.html

伸展树节点:
/**
 * node of splay tree
 * @author cuiods
 */
public class SplayTreeNode> extends BSTNode {

    public SplayTreeNode() {
        right = left = null;
    }
    public SplayTreeNode(T data) {
        this(data,null,null);
    }
    public SplayTreeNode(T data, SplayTreeNode left, SplayTreeNode right) {
        this.data = data;
        this.left = left;
        this.right = right;
    }
}
伸展树:
/**
 * splay tree implement
 * http://www.cnblogs.com/skywang12345/p/3604286.html
 * @author cuiods
 */
public abstract class SplayTree> extends BSTree {

    /**
     * 处理插入时遇到相同的节点
     * @param data 插入时已经存在的节点
     */
    protected abstract void handleSame(T data);

    @Override
    public void insert(T key) {
        SplayTreeNode z=new SplayTreeNode(key);

        // 插入节点
        root = insert((SplayTreeNode) root, z);
        // 将节点(key)旋转为根节点
        root = splay((SplayTreeNode) root, key);
    }

    @Override
    public T search(T key) {
        T result = super.search(key);
        splay(key);
        return result;
    }

    private void splay(T key) {
        root = splay((SplayTreeNode) root, key);
    }


    /*
    * 旋转key对应的节点为根节点,并返回根节点。
    *
    * 注意:
    *   (a):伸展树中存在"键值为key的节点"。
    *          将"键值为key的节点"旋转为根节点。
    *   (b):伸展树中不存在"键值为key的节点",并且key < tree.key。
    *      b-1 "键值为key的节点"的前驱节点存在的话,将"键值为key的节点"的前驱节点旋转为根节点。
    *      b-2 "键值为key的节点"的前驱节点不存在的话,则意味着,key比树中任何键值都小,那么此时,将最小节点旋转为根节点。
    *   (c):伸展树中不存在"键值为key的节点",并且key > tree.key。
    *      c-1 "键值为key的节点"的后继节点存在的话,将"键值为key的节点"的后继节点旋转为根节点。
    *      c-2 "键值为key的节点"的后继节点不存在的话,则意味着,key比树中任何键值都大,那么此时,将最大节点旋转为根节点。
    */
    private SplayTreeNode splay(SplayTreeNode tree, T data) {
        if (tree == null)
            return null;

        SplayTreeNode N = new SplayTreeNode();
        SplayTreeNode l = N;
        SplayTreeNode r = N;
        SplayTreeNode c;

        for (;;) {
            int cmp = data.compareTo(tree.data);
            if (cmp < 0) {
                if (tree.left == null)
                    break;
                if (data.compareTo(tree.left.data) < 0) {
                    c = (SplayTreeNode) tree.left;                           /* rotate right */
                    tree.left = c.right;
                    c.right = tree;
                    tree = c;
                    if (tree.left == null)
                        break;
                }
                r.left = tree;                               /* link right */
                r = tree;
                tree = (SplayTreeNode) tree.left;
            } else if (cmp > 0) {
                if (tree.right == null)
                    break;
                if (data.compareTo(tree.right.data) > 0) {
                    c = (SplayTreeNode) tree.right;                          /* rotate left */
                    tree.right = c.left;
                    c.left = tree;
                    tree = c;
                    if (tree.right == null)
                        break;
                }
                l.right = tree;                              /* link left */
                l = tree;
                tree = (SplayTreeNode) tree.right;
            } else {
                break;
            }
        }
        l.right = tree.left;                                /* assemble */
        r.left = tree.right;
        tree.left = N.right;
        tree.right = N.left;
        return tree;
    }

    /*
    * 将结点插入到伸展树中,并返回根节点
    *
    * 参数说明:
    *     tree 伸展树的
    *     z 插入的结点
    */
    private SplayTreeNode insert(SplayTreeNode tree, SplayTreeNode z) {
        int cmp;
        SplayTreeNode y = null;
        SplayTreeNode x = tree;

        // 查找z的插入位置
        while (x != null) {
            y = x;
            cmp = z.data.compareTo(x.data);
            if (cmp < 0)
                x = (SplayTreeNode) x.left;
            else if (cmp > 0)
                x = (SplayTreeNode) x.right;
            else {
                handleSame(z.data);
                return tree;
            }
        }

        if (y==null)
            tree = z;
        else {
            cmp = z.data.compareTo(y.data);
            if (cmp < 0)
                y.left = z;
            else
                y.right = z;
        }

        return tree;
    }


}

3、统计单词频率

特殊的单词伸展树:
/**
 * word splay tree
 * @author cuiods
 */
public class WordSplay extends SplayTree{

    private int differentWords, wordCount;

    @Override
    protected void handleSame(Word data) {
        data.setFreq(data.getFreq()+1);
    }

    @Override
    protected void visit(Word word) {
        super.visit(word);
        differentWords ++;
        wordCount += word.getFreq();
    }

    public void run(InputStream inputStream, String fileName) {
        int ch = 1;
        Word p = null;
        try {
            while (ch >-1) {
                while (true) {
                    if (ch > -1 && !Character.isLetter((char) ch))
                        ch = inputStream.read();
                    else break;
                }
                if (ch == -1) break;
                String s = "";
                while (ch > -1 && Character.isLetter((char) ch)) {
                    s += Character.toUpperCase((char) ch);
                    ch = inputStream.read();
                }
                insert(new Word(s));
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        inorder();
        System.out.println("\nFile "+fileName+" contains "+ wordCount + " words among which "+differentWords+" are different.\n");
    }
}

单词类:

/**
 * @author cuiods
 */
public class Word implements Comparable{

    private String word;
    private int freq = 1;

    public Word(String w) {
        word = w;
    }

    public String getWord() {
        return word;
    }

    public void setWord(String word) {
        this.word = word;
    }

    public int getFreq() {
        return freq;
    }

    public void setFreq(int freq) {
        this.freq = freq;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        Word word1 = (Word) o;

        return word != null ? word.equals(word1.word) : word1.word == null;

    }

    @Override
    public int compareTo(Word o) {
        return word.compareTo(o.word);
    }

    @Override
    public String toString() {
        return "Word{" +
                "word='" + word + '\'' +
                '}';
    }
}

使用单词伸展树统计:
/**
 * splay word test
 */
public class Main {
    public static void main(String[] args) {
        InputStream inputStream = null;
        String fileName = "";
        try {
            if (args.length == 0) {
                System.out.print("Enter a file name: ");
                Scanner scanner = new Scanner(System.in);
                fileName = scanner.nextLine();
                inputStream = new FileInputStream(fileName);
            } else {
                inputStream = new FileInputStream(args[0]);
                fileName = args[0];
            }
            WordSplay splay = new WordSplay();
            splay.run(inputStream,fileName);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}





你可能感兴趣的:(数据结构)