/* 朴素字典树,参考多叉树、二叉树的一些操作
* 参考 https://segmentfault.com/a/1190000002451356
* 参考 http://blog.csdn.net/rongdongzhu1/article/details/48416371
* 用链表实现,功能包括创建字典树、填充新的词语、查找存好的字典中是否含有特定词语
* 待完善:1)删改功能没弄;
* 2)中文的“字”需要特殊处理下;
* 3)建立字典树的原始语料,一般在文本文件中,文件读取需要单独写个函数
* 4)可能要求对Trie Tree特殊的查找操作,比如找到含有以“美”开头的所有词
* 5)字典树中也可以含有一些统计特征(TFIDF?),在Trie Tree类中加入新的int变量
* 6)字典树的其它实现方式,比如double-array trie。参考 https://linux.thai.net/~thep/datrie/datrie.html
* http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar/
*/
#include
using namespace std;
struct Node
{
char data;
struct Node *first_child, *next_sibling; //fc是第一个孩子,nb是fc的临节点
};
class TrieTree{
public:
Node* PTrieTreeHead;
TrieTree(); // 构造函数
~TrieTree();
Node* insertTree(char *ch, Node *parent, Node *pre_sibling);
Node* LinkList_Find(Node *pHead, char ch); //同层是否含有字符ch
Node* LinkList_Push_Back(Node* pt, char ch); //同层插入新Node,data->字符ch
void TrieTree_Insert(char *Ch, int len); //插入长度为len的字符串*Ch
bool LinkList_Find(char *Ch, int len); //查找是否含有字符串*Ch
void preOrderTranverse(Node *root);
private:
Node* LinkList_InIt();
bool TrieTree_Clear();
void TrieTree_Clear(Node *pt);
};
Node* TrieTree :: LinkList_InIt(){
Node *pHead = new Node;
pHead->data = 'S';
pHead->first_child = NULL;
pHead->next_sibling = NULL;
return pHead;
}
// 同层(级别)查找有没有字符ch
Node* TrieTree::LinkList_Find(Node *pHead,char ch){
Node *pt = pHead;
while(pt){
if(pt->data == ch) {
return pt;
}
pt = pt->next_sibling;
}
return NULL;
}
// 同层(级别)插入新字符Ch
Node * TrieTree :: LinkList_Push_Back(Node* pt, char ch){
Node *pn;
while(pt){
pn = pt;
pt = pt->next_sibling;
}
pt = pn->next_sibling = new Node;
pt->first_child = NULL;
pt->next_sibling = NULL;
pt->data = ch;
return pt;
}
void TrieTree :: TrieTree_Insert(char *Ch, int len){
int i;
Node* pt = PTrieTreeHead,*pf;
for(i=0;i // 同层查找,如果找到字符Ch[i],返回链表中的相关Node
pf = LinkList_Find(pt,Ch[i]);
// 如果当前层没有找到,添加新字符
if(pf == NULL){
// pt移动到插入字符Node的first_child
pt = LinkList_Push_Back(pt,Ch[i])->first_child = LinkList_InIt();
pt->first_child = NULL;
pt->next_sibling = NULL;
}
else{
// 转入下一层
pt = pf->first_child;
}
}
// 将最后一个单独处理
pf = LinkList_Find(pt,Ch[i]);
if(pf == NULL){
pf = LinkList_Push_Back(pt,Ch[i]);
// 务必new一个新链表 后续插入依赖其实现 原则是 要访问到的first_sibling 不为空
// 认为每一个元素都有后继
pf->first_child = LinkList_InIt();
pf->next_sibling = NULL;
}
}
/*
Node * TrieTree :: insertTree(char *ch, Node *parent, Node *pre_sibling) {
Node *child = new Node;
child->data = ch;
if (parent != NULL) parent->first_child = child;
if (pre_sibling != NULL) pre_sibling->next_sibling = child;
child->first_child = NULL;
child->next_sibling = NULL;
return child;
}
*/
bool TrieTree :: LinkList_Find(char *Ch, int len) {
// 根节点
Node* pt = PTrieTreeHead;
for(int i=0;i pt = LinkList_Find(pt,Ch[i]);
if (pt == NULL) return false;
else pt = pt->first_child;
}
pt = LinkList_Find(pt,Ch[len-1]);
if (pt != NULL) return true;
else return true;
}
// 同层先遍历
void TrieTree :: preOrderTranverse(Node *root) {
if (root->data != 'S') {
printf("%c ", root->data);
}
if (root->first_child != NULL) {
preOrderTranverse(root->first_child);
}
if (root->next_sibling != NULL) {
preOrderTranverse(root->next_sibling);
}
}
// 清理这棵树 采用中序遍历的方法
bool TrieTree :: TrieTree_Clear(){
if(PTrieTreeHead == NULL) return false;
TrieTree_Clear(PTrieTreeHead);
return true;
}
void TrieTree :: TrieTree_Clear(Node *pt){
if(pt->first_child != NULL) TrieTree_Clear(pt->first_child);
if(pt->next_sibling != NULL) TrieTree_Clear(pt->next_sibling);
delete pt;
}
// 初始化一棵字典树
TrieTree :: TrieTree(){
// 初始化根节点
// 指向第一个链表的头节点
PTrieTreeHead = LinkList_InIt();
}
TrieTree :: ~TrieTree(){
TrieTree_Clear();
//Vector_Clear();
}
int main() {
cout << "!!!Hello World!!!" << endl; // prints !!!Hello World!!!
TrieTree newTrie;
/*
Node *b = newTrie.insertTree((char*)"B", newTrie.PTrieTreeHead, NULL);
Node *c = newTrie.insertTree((char*)"C", NULL, b);
Node *g = newTrie.insertTree((char*)"G", c, NULL);
Node *d = newTrie.insertTree((char*)"D", NULL, c);
Node *e = newTrie.insertTree((char*)"E", b, NULL);
Node *f = newTrie.insertTree((char*)"F", NULL, e);
*/
//string to char*
string str="nice";
int len = str.length();
char p[len];
str.copy(p,len,0);
newTrie.TrieTree_Insert((char*)"BPL", 3);
newTrie.TrieTree_Insert((char*)"BF", 2);
newTrie.TrieTree_Insert((char*)"CGM", 3);
newTrie.TrieTree_Insert((char*)"WX", 2);
newTrie.TrieTree_Insert(p, len);
newTrie.preOrderTranverse(newTrie.PTrieTreeHead);
bool tmpb = newTrie.LinkList_Find(p, len);
if (tmpb == true) {
printf("\n Is \'%s\' in the vocabulary? Yes.", p);
} else {
printf("\n Is \'%s\' in the vocabulary? No.", p);
}
char *p1 = "really";
int len1 = sizeof(p1)/sizeof(char);
bool tmpb1 = newTrie.LinkList_Find(p1, len1);
if (tmpb1 == true) {
printf("\n Is \'%s\' in the vocabulary? Yes.", p1);
} else {
printf("\n Is \'%s\' in the vocabulary? No.", p1);
}
return 0;
}