lTrie原理
Trie的核心思想是空间换时间。利用字符串的公共前缀来降低查询时间的开销以达到提高效率的目的。
lTrie性质
好多人说trie的根节点不包含任何字符信息,我所习惯的trie根节点却是包含信息的,而且认为这样也方便,下面说一下它的性质 (基于本文所讨论的简单trie树)
1. 字符的种数决定每个节点的出度,即branch数组(空间换时间思想)
2. branch数组的下标代表字符相对于a的相对位置
3. 采用标记的方法确定是否为字符串。
4. 插入、查找的复杂度均为O(len),len为字符串长度
lTrie的示意图
如图所示,该trie树存有abc、d、da、dda四个字符串,如果是字符串会在节点的尾部进行标记。没有后续字符的branch分支指向NULL
lTrie的优点举例
已知n个由小写字母构成的平均长度为10的单词,判断其中是否存在某个串为另一个串的前缀子串。下面对比3种方法:
1. 最容易想到的:即从字符串集中从头往后搜,看每个字符串是否为字符串集中某个字符串的前缀,复杂度为O(n^2)。
2. 使用hash:我们用hash存下所有字符串的所有的前缀子串。建立存有子串hash的复杂度为O(n*len)。查询的复杂度为O(n)* O(1)= O(n)。
3. 使用trie:因为当查询如字符串abc是否为某个字符串的前缀时,显然以b,c,d....等不是以a开头的字符串就不用查找了。所以建立trie的复杂度为O(n*len),而建立+查询在trie中是可以同时执行的,建立的过程也就可以成为查询的过程,hash就不能实现这个功能。所以总的复杂度为O(n*len),实际查询的复杂度只是O(len)。
package trie;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class Trie {
private Vertex root;
protected class Vertex {
protected int words;
protected int prefixes;
protected Vertex[] edges;
Vertex() {
words = 0;
prefixes = 0;
edges = new Vertex[26];
for (int i = 0; i < edges.length; i++) {
edges[i] = null;
}
}
}
public Trie() {
root = new Vertex();
}
/** */
/**
*
* List all words in the Trie.
*
*
*
* @return
*
*/
public List<String> listAllWords() {
List<String> words = new ArrayList<String>();
Vertex[] edges = root.edges;
for (int i = 0; i < edges.length; i++) {
if (edges[i] != null) {
// System.out.println("test");
String word = "" + (char) ('a' + i);
depthFirstSearchWords(words, edges[i], word);
}
}
return words;
}
public int countPrefixes(String prefix) {
return countPrefixes(root, prefix);
}
private int countPrefixes(Vertex vertex, String prefixSegment) {
if (prefixSegment.length() == 0) { // reach the last character of the
// word
return vertex.prefixes;
}
char c = prefixSegment.charAt(0);
int index = c - 'a';
if (vertex.edges[index] == null) { // the word does NOT exist
return 0;
} else {
return countPrefixes(vertex.edges[index], prefixSegment
.substring(1));
}
}
public int countWords(String word) {
return countWords(root, word);
}
private int countWords(Vertex vertex, String wordSegment) {
if (wordSegment.length() == 0) { // reach the last character of the
// word
return vertex.words;
}
char c = wordSegment.charAt(0);
int index = c - 'a';
if (vertex.edges[index] == null) { // the word does NOT exist
return 0;
} else {
return countWords(vertex.edges[index], wordSegment.substring(1));
}
}
/** */
/**
*
* Depth First Search words in the Trie and add them to the List.
*
*
*
* @param words
*
* @param vertex
*
* @param wordSegment
*
*/
private void depthFirstSearchWords(List<String> words, Vertex vertex,
String wordSegment) {
Vertex[] edges = vertex.edges;
boolean hasChildren = false;
for (int i = 0; i < edges.length; i++) {
if (edges[i] != null) {
hasChildren = true;
String newWord = wordSegment + (char) ('a' + i);
depthFirstSearchWords(words, edges[i], newWord);
}
}
if (!hasChildren) {
words.add(wordSegment);
}
}
/** */
/**
*
* Add a word to the Trie.
*
*
*
* @param word
* The word to be added.
*
*/
public void addWord(String word) {
addWord(root, word);
}
/** */
/**
*
* Add the word from the specified vertex.
*
* @param vertex
* The specified vertex.
*
* @param word
* The word to be added.
*
*/
private void addWord(Vertex vertex, String word) {
if (word.length() == 0) { // if all characters of the word has been
// added
vertex.words++;
} else {
vertex.prefixes++;
char c = word.charAt(0);
c = Character.toLowerCase(c);
int index = c - 'a';
if (vertex.edges[index] == null) { // if the edge does NOT exist
vertex.edges[index] = new Vertex();
}
addWord(vertex.edges[index], word.substring(1)); // go the the
// next
// character
}
}
public static void main(String args[]) // Just used for test
{
Trie trie = new Trie();
trie.addWord("China");
trie.addWord("crawl");
trie.addWord("crime");
trie.addWord("ban");
trie.addWord("english");
trie.addWord("establish");
trie.addWord("eat");
List<String> list = trie.listAllWords();
Iterator listiterator = list.listIterator();
while (listiterator.hasNext())
{
String s = (String) listiterator.next();
System.out.println(s);
}
int count = trie.countPrefixes("c");
System.out.println("the count of c prefixes:" + count);
}
}