最简Trie的实现
无附加功能,主要提供insert,search和prefixsearch。具体原理请参见http://en.wikipedia.org/wiki/Trie
先来看下trie树的样子:
图片来源wikipedia
1: public class Node {
2: char content;
3: boolean marker;
4: Collection<Node> child;
5: boolean visited;
6:
7: public Node(char c) {
8: child = new LinkedList<Node>();
9: marker = false;
10: content = c;
11: visited = false;
12: }
13:
14: public Node subNode(char c) {
15: if (child != null) {
16: for (Node eachChild : child) {
17: if (eachChild.content == c) {
18: return eachChild;
19: }
20: }
21: }
22: return null;
23: }
24: }
Node是一个trie树的节点,其中content表示节点对应的字符,marker表示是否该节点是否是一个单词的完结节点。child顾名思义是节点的所有子节点集合,visited在遍历时需要,表示是否访问过。
具体trie:
1: public class Trie {
2: private Node root;
3:
4: public Trie() {
5: root = new Node(' ');
6: }
7:
8: public void insert(String s) {
9: Node current = root;
10: if (s.length() == 0) // For an empty character
11: current.marker = true;
12: for (int i = 0; i < s.length(); i++) {
13: Node child = current.subNode(s.charAt(i));
14: if (child != null) {
15: current = child;
16: } else {
17: current.child.add(new Node(s.charAt(i)));
18: current = current.subNode(s.charAt(i));
19: }
20: // Set marker to indicate end of the word
21: if (i == s.length() - 1)
22: current.marker = true;
23: }
24: }
25:
26: public boolean search(String s) {
27: Node current = root;
28: while (current != null) {
29: for (int i = 0; i < s.length(); i++) {
30: if (current.subNode(s.charAt(i)) == null)
31: return false;
32: else
33: current = current.subNode(s.charAt(i));
34: }/*
35: * This means that a string exists, but make sure its a word by
36: * checking its 'marker' flag
37: */
38: if (current.marker == true)
39: return true;
40: else
41: return false;
42: }
43: return false;
44: }
45:
46: public List<String> searchPrefix(String s) {
47:
48: Node current = root;
49: if (current == null) {
50: return null;
51: }
52: List<String> list = new ArrayList<String>();
53: Node endNode = null;
54: StringBuilder endSB = new StringBuilder();
55: for (int i = 0; i < s.length(); i++) {
56: if (current.subNode(s.charAt(i)) == null) {
57: endNode = current;
58: break;
59: } else {
60: current = current.subNode(s.charAt(i));
61: endNode = current;
62: endSB.append(endNode.content);
63: }
64: }
65: if (endNode == null) {
66: return null;
67: }
68: if (endNode.marker == true) {// found as a word
69: list.add(endSB.toString());
70: }
71: // depth-first search the sub branch
72: Stack<Node> stack = new Stack<Node>();
73: stack.push(endNode);
74: while (!stack.isEmpty()) {
75: Node cur = stack.peek();
76: int needCount = cur.child.size();
77: for (Node node : cur.child) {
78: if (!node.visited) {
79: node.visited = true;
80: stack.push(node);
81: endSB.append(node.content);
82: if (node.marker) {
83: list.add(endSB.toString());
84: }
85: break;
86: } else {
87: needCount--;
88: }
89: }
90: if (needCount == 0) {
91: stack.pop();
92: endSB.deleteCharAt(endSB.length()-1);
93: }
94: }
95:
96: return list;
97: }
98:
99: public static void main(String[] args) {
100: Trie trie = new Trie();
101: trie.insert("ball");
102: trie.insert("bat");
103: trie.insert("dead");
104: trie.insert("do");
105: trie.insert("doll");
106: trie.insert("dork");
107: trie.insert("dorm");
108: trie.insert("send");
109: trie.insert("sense");
110: List<String> list = trie.searchPrefix("d");
111: for (String s : list) {
112: System.out.println(s);
113: }
114: }
115: }
其中构建树的时候需要不断的将新词insert到结构中,search可以接受一个词作为输入,返回这个词是否在词典中。如果只是search来判断是否存在,那么hashmap更好的解决这个问题。trie树的最大强项是利用前缀检索,比如想知道一个词典中以固定前缀开始的词有多少,那么prefixsearch可以满足需求。
这棵树可扩展的地方太多太多,现有代码只是一个铺垫,对于任何处理字符的需求,扩展属于自己的特定业务场景的trie是个很不错的选择。
本文的实现是基于http://www.technicalypto.com/2010/04/trie-in-java.html这里实现的,trie树截图也来源于此,在此对作者表示感激。