trie数的实现

      Trie树又称单词查找树,Trie树,是一种树形结构,是一种哈希树的变种。典型应用是用于统计,排序和保存大量的字符串(但不仅限于字符串),所以经常被搜索引擎系统用于文本词频统计。它的优点是:利用字符串的公共前缀来减少查询时间,最大限度地减少无谓的字符串比较,查询效率比哈希表高。

      这里的需求是,给如一组词汇,北京人,北京,武汉,武汉话等等。能够统计“武汉”这个词的词频,输入"武"的时候,能够得到以“武”开头的所有词。实现的代码如下:

package com.dong.util;

import java.util.ArrayList;
import java.util.List;

public class Trie {
    // 根节点
    private final TrieNode root = new TrieNode(' ');

    // 向trie树中插入一个词
    public void insert(String word) {
        if (word == null || word.length() == 0) {
            return;
        }
        String left = word;
        TrieNode cur = root;
        while (left.length() > 0) {
            char toInert = left.charAt(0);
            TrieNode next = null;
            // 如果那个节点不存在的话,将当前节点插入
            if (containCharNode(cur.getChild(), toInert) == null) {
                next = new TrieNode(toInert);
                cur.getChild().add(next);
            } else {
                next = containCharNode(cur.getChild(), toInert);
            }
            cur = next;
            left = left.substring(1);
            if (left.length() == 0) {
                cur.setFreq(cur.getFreq() + 1);
            }
        }
    }

    // 通过前缀查找词,返回包括前缀的所有词。
    public List<String> search(String prefix) {
        List<String> retList = new ArrayList<String>();
        List<String> tempList = new ArrayList<String>();
        if (prefix == null || prefix.length() == 0) {
            return null;
        }
        String left = prefix;
        TrieNode cur = root;
        while (left.length() > 0) {
            char toFind = left.charAt(0);
            TrieNode next = null;
            // 如果那个节点不存在的话,将当前节点插入
            if (containCharNode(cur.getChild(), toFind) == null) {
                return null;
            } else {
                next = containCharNode(cur.getChild(), toFind);
                if (left.length() == 1) {
                    cur = next;
                    break;
                }
            }
            cur = next;
            left = left.substring(1);

        }

        dfs(cur, new ArrayList<Character>(), tempList);
        for (String s : tempList) {
            retList.add(prefix + s);
        }

        if (getFreq(prefix) > 0) {
            retList.add(prefix);

        }
        return retList;
    }

    // 深度搜索一个trieNode节点下的所有的词
    private void dfs(TrieNode root, List<Character> stack, List<String> retList) {
        if (root.getChild().size() == 0) {
            StringBuffer sb = new StringBuffer();
            for (char c : stack) {
                sb.append(c);
            }
            retList.add(sb.toString());

        } else {
            for (TrieNode r : root.getChild()) {
                stack.add(r.getVal());
                dfs(r, stack, retList);
                stack.remove(stack.size() - 1);
            }
        }

    }

    // 查看一个节点的的子节点是否包含一个字符
    public TrieNode containCharNode(List<TrieNode> child, char c) {
        TrieNode ret = null;
        for (TrieNode temp : child) {
            if (temp.getVal() == c) {
                ret = temp;
            }
        }
        return ret;

    }

    // 得到一个词的词频
    public int getFreq(String word) {
        if (word == null || word.length() == 0) {
            return 0;
        }
        String left = word;
        TrieNode cur = root;
        while (left.length() > 0) {
            char toFind = left.charAt(0);
            TrieNode next = null;
            // 如果不存在此节点,返回0
            if (containCharNode(cur.getChild(), toFind) == null) {
                return 0;
            } else {
                next = containCharNode(cur.getChild(), toFind);
                if (left.length() == 1) {
                    cur = next;
                    break;
                }
            }
            cur = next;
            left = left.substring(1);

        }
        return cur.getFreq();
    }

    public static String fill(String prefix, ArrayList<Character> stack) {
        ArrayList<String> retList = new ArrayList<String>();
        StringBuffer sb = new StringBuffer();
        sb.append(prefix);
        for (char c : stack) {
            sb.append(c);
        }
        return sb.toString();

    }

    public static void main(String[] args) {
        Trie t = new Trie();
        String[] wordList = { "我晕", "我晕啊", "我不信啊", "我信了", "也是", "这不对啊", "我晕" };
        for (String word : wordList) {
            t.insert(word);
        }
        System.out.println(t.search("我晕"));

    }
}

class TrieNode {
    // 节点下存放的字符
    private char val;
    // 一个节点下面的子节点
    private List<TrieNode> child;
    // 该词的词频
    private int freq;

    public TrieNode(char val) {
        child = new ArrayList();
        freq = 0;
        this.val = val;

    }

    public char getVal() {
        return val;
    }

    public void setVal(char val) {
        this.val = val;
    }

    public List<TrieNode> getChild() {
        return child;
    }

    public void setChild(List<TrieNode> child) {
        this.child = child;
    }

    public int getFreq() {
        return freq;
    }

    public void setFreq(int freq) {
        this.freq = freq;
    }
}

 

你可能感兴趣的:(trie数的实现)