字典树 Trie

 

Trie,又称字典树,前缀树(prefix tree),是一种树形结构,用于保存大量的字符串。

它的优点是:利用字符串的公共前缀来节约存储空间。查找、插入复杂度为O(n),n为字符串长度。 

它有3个基本性质:

1. 根节点不包含字符,除根节点外每一个节点都只包含一个字符。
2. 从根节点到某一节点,路径上经过的字符连接起来,为该节点对应的字符串。
3. 每个节点的所有子节点包含的字符都不相同。

 

假设有abc,abcd,abd, b, bcd,efg,hii这7个单词,可构建字典树如下:

字典树 Trie_第1张图片

 


查找一个字符串时,我们只需从根结点按字符串中字符出现顺序依次往下走。如果到最后字符串结束时,对应的结点标记为红色,则该字符串存在;否则不存在。
插入时也只需从根结点往下遍历,碰到已存在的字符结点就往下遍历,否则,建立新结点;最后标记最后一个字符的结点为红色即可。

 

实现:

#include <iostream> using namespace std; const int kind=26;//字母种类 struct Treenode//树的结点结构 { char ch; //节点处的字符 bool isColored; //是否标记为红色 int count; //子节点个数 Treenode *next[kind];//指向子结点 Treenode *parent; //父节点 Treenode(char thech, Treenode* par)//结点初始化 { ch=thech; isColored=false; count=0; parent=par; for(int i=0;i<kind;i++) next[i]=NULL; } }; void insert(Treenode *root,char *word)//向以root为根结点的树中插入串word { Treenode *location=root; int i=0,branch=0; if(location==NULL) { location=new Treenode(' ', NULL); //根节点字符为空,用空格(' ')表示 root=location; } while(word[i]) { branch=word[i]-'a'; if(!location->next[branch]) location->next[branch]=new Treenode(word[i], location);//如果不存在,建新结点 location->count++; location=location->next[branch]; i++; } location->isColored = true; //标记节点为叶节点 } Treenode* search(Treenode *root,char *word)//查找,找到则返回相应节点指针 { Treenode *location=root; int i=0,branch=0; if(location==NULL) return NULL; while(word[i]) { branch=word[i]-'a'; if(!location->next[branch]) return NULL; location=location->next[branch]; i++; } if(location->isColored) return location; return NULL; } char* longest_prefix(Treenode *root, const char *word) //返回word的最长前缀 { Treenode *location=root; int i=0,branch=0; if(location==NULL) return NULL; while(word[i]) { branch=word[i]-'a'; if(!location->next[branch]) break; location=location->next[branch]; i++; } if(i == 0) return NULL; return (char*)string(word).substr(0, i).c_str(); } //获取所有以root为根的(红色)结点,并存放到allElement中 vector<char*> getAll(Treenode *root, char *str, int i, vector<char*> &allElement) { str[i] = root->ch; if(root->isColored) { str[i+1] = '/0'; char *temp = (char*)malloc(strlen(str)*sizeof(char)); strcpy(temp, str+1); temp[strlen(str)-1]='/0'; allElement.push_back(temp); } for(int j=0;j<kind;j++) { if(root->next[j]!=NULL) { getAll(root->next[j],str,i+1, allElement); } } } //获取所有以word为前缀的红色结点,并存放到allElement中(不包含前缀,使用时需额外添加) void autocomplete(Treenode *root, const char *word, char *str, int i, vector<char*> &allElement) { Treenode *location=root; int j=0,branch=0; if(location==NULL) return ; while(word[j]) { branch=word[j]-'a'; if(!location->next[branch]) return ; location=location->next[branch]; j++; } getAll(location, str, i, allElement); } void remove(Treenode *root, const char *word) { Treenode *target = search(root, word); if(!target) return ; if(target->isColored) target->isColored = false; if(target->count == 0) { //如果target没有子节点,则将其从父节点中移除。(不做此步亦可) target->parent->next[target->ch - 'a'] = NULL; } } void print(Treenode *root, char *str, int i) //输出所有(红色)节点 { str[i] = root->ch; if(root->isColored) { str[i+1] = '/0'; puts(str+1); } for(int j=0;j<kind;j++) { if(root->next[j]!=NULL) { print(root->next[j],str,i+1); } } } int main() { char word[10]; char ask[10]; char str[20]; Treenode *root=NULL; cout<<"input the strings to build the tire:/n"; while(gets(word)) { if(word[0]=='/0') break; insert(root,word); } vector<char*> allElement; vector<char*>::iterator pos; getAll(root, str, 0, allElement); for(pos = allElement.begin(); pos != allElement.end(); ++pos) { cout<<"ab"<<*pos<<endl; } cout<<"所有以ab为前缀的红色结点:/n"; autocomplete(root, "ab", str, 0, allElement); for(pos = allElement.begin(); pos != allElement.end(); ++pos) { cout<<"ab"<<*pos<<endl; } //print(root, str, 0); cout<<"abcd的最长前缀: "; cout<<(char*)longest_prefix(root, "abcd")<<endl; cout<<"input a string to search: "; gets(ask); search(root,ask) == NULL ? cout<<ask<<" is not found."<<endl : cout<<ask<<" is found."<<endl; remove(root,ask); cout<<"after delete "<<ask<<endl; print(root, str, 0); return 0; } 

 

扩展阅读: http://www.cs.mcgill.ca/~cs251/OldCourses/1997/topic7/

           http://marknelson.us/1996/08/01/suffix-trees/

           http://www.allisons.org/ll/AlgDS/Tree/Suffix/

           http://home.tiac.net/~cri/2007/urtree.html

 

 

你可能感兴趣的:(autocomplete,null,search,iterator,insert,branch)