字典树 Trie


Trie,又称字典树,前缀树(prefix tree),是一种树形结构,用于保存大量的字符串。



1. 根节点不包含字符,除根节点外每一个节点都只包含一个字符。
2. 从根节点到某一节点,路径上经过的字符连接起来,为该节点对应的字符串。
3. 每个节点的所有子节点包含的字符都不相同。


假设有abc,abcd,abd, b, bcd,efg,hii这7个单词,可构建字典树如下:

字典树 Trie_第1张图片





#include <iostream> using namespace std; const int kind=26;//字母种类 struct Treenode//树的结点结构 { char ch; //节点处的字符 bool isColored; //是否标记为红色 int count; //子节点个数 Treenode *next[kind];//指向子结点 Treenode *parent; //父节点 Treenode(char thech, Treenode* par)//结点初始化 { ch=thech; isColored=false; count=0; parent=par; for(int i=0;i<kind;i++) next[i]=NULL; } }; void insert(Treenode *root,char *word)//向以root为根结点的树中插入串word { Treenode *location=root; int i=0,branch=0; if(location==NULL) { location=new Treenode(' ', NULL); //根节点字符为空,用空格(' ')表示 root=location; } while(word[i]) { branch=word[i]-'a'; if(!location->next[branch]) location->next[branch]=new Treenode(word[i], location);//如果不存在,建新结点 location->count++; location=location->next[branch]; i++; } location->isColored = true; //标记节点为叶节点 } Treenode* search(Treenode *root,char *word)//查找,找到则返回相应节点指针 { Treenode *location=root; int i=0,branch=0; if(location==NULL) return NULL; while(word[i]) { branch=word[i]-'a'; if(!location->next[branch]) return NULL; location=location->next[branch]; i++; } if(location->isColored) return location; return NULL; } char* longest_prefix(Treenode *root, const char *word) //返回word的最长前缀 { Treenode *location=root; int i=0,branch=0; if(location==NULL) return NULL; while(word[i]) { branch=word[i]-'a'; if(!location->next[branch]) break; location=location->next[branch]; i++; } if(i == 0) return NULL; return (char*)string(word).substr(0, i).c_str(); } //获取所有以root为根的(红色)结点,并存放到allElement中 vector<char*> getAll(Treenode *root, char *str, int i, vector<char*> &allElement) { str[i] = root->ch; if(root->isColored) { str[i+1] = '/0'; char *temp = (char*)malloc(strlen(str)*sizeof(char)); strcpy(temp, str+1); temp[strlen(str)-1]='/0'; allElement.push_back(temp); } for(int j=0;j<kind;j++) { if(root->next[j]!=NULL) { getAll(root->next[j],str,i+1, allElement); } } } //获取所有以word为前缀的红色结点,并存放到allElement中(不包含前缀,使用时需额外添加) void autocomplete(Treenode *root, const char *word, char *str, int i, vector<char*> &allElement) { Treenode *location=root; int j=0,branch=0; if(location==NULL) return ; while(word[j]) { branch=word[j]-'a'; if(!location->next[branch]) return ; location=location->next[branch]; j++; } getAll(location, str, i, allElement); } void remove(Treenode *root, const char *word) { Treenode *target = search(root, word); if(!target) return ; if(target->isColored) target->isColored = false; if(target->count == 0) { //如果target没有子节点,则将其从父节点中移除。(不做此步亦可) target->parent->next[target->ch - 'a'] = NULL; } } void print(Treenode *root, char *str, int i) //输出所有(红色)节点 { str[i] = root->ch; if(root->isColored) { str[i+1] = '/0'; puts(str+1); } for(int j=0;j<kind;j++) { if(root->next[j]!=NULL) { print(root->next[j],str,i+1); } } } int main() { char word[10]; char ask[10]; char str[20]; Treenode *root=NULL; cout<<"input the strings to build the tire:/n"; while(gets(word)) { if(word[0]=='/0') break; insert(root,word); } vector<char*> allElement; vector<char*>::iterator pos; getAll(root, str, 0, allElement); for(pos = allElement.begin(); pos != allElement.end(); ++pos) { cout<<"ab"<<*pos<<endl; } cout<<"所有以ab为前缀的红色结点:/n"; autocomplete(root, "ab", str, 0, allElement); for(pos = allElement.begin(); pos != allElement.end(); ++pos) { cout<<"ab"<<*pos<<endl; } //print(root, str, 0); cout<<"abcd的最长前缀: "; cout<<(char*)longest_prefix(root, "abcd")<<endl; cout<<"input a string to search: "; gets(ask); search(root,ask) == NULL ? cout<<ask<<" is not found."<<endl : cout<<ask<<" is found."<<endl; remove(root,ask); cout<<"after delete "<<ask<<endl; print(root, str, 0); return 0; } 


扩展阅读: http://www.cs.mcgill.ca/~cs251/OldCourses/1997/topic7/





