C++ 高级数据结构——Trie树

trie树,又称字典树或前缀树,是一种有序的、用于统计、排序和存储字符串的数据结构,它与二叉查找树不同,关键字不是直接保存在节点中,而是由节点在书中的位置决定。
一个节点的所有子孙都有相同的前缀,也就是这个节点对应的字符串,而根节点对应空字符串。一般情况下,不是所有的节点都有对应的值,只有叶子节点和部分内部节点所对应的键才有相关的值。
trie树的最大优点就是利用字符串的公共前缀来减少存储空间与查询时间,从而最大限度地减少无谓的字符串比较,是非常高效的字符串查找数据结构。

#include
#define TRIE_MAX_CHAR_NUM 26
//定义TrieNode
struct TrieNode
{
     
 TrieNode* child[TRIE_MAX_CHAR_NUM];
 bool is_end;
 TrieNode() : is_end(false)
 {
     
  for (int i = 0; i < TRIE_MAX_CHAR_NUM; i++)
  {
     
   child[i] = 0;
  }
 }
};
//TrieNode的前序遍历
void preorder_trie(TrieNode* node, int layer)
{
     
 for (int i = 0; i < TRIE_MAX_CHAR_NUM; i++)
 {
     
  if (node->child[i])
  {
     
   for (int j = 0; j < layer; j++)
   {
     
    printf("---");
   }
   printf("%c", i+'a');
   if (node->child[i]->is_end)
   {
     
    printf("(end)");
   }
   printf("\n");
   preorder_trie(node->child[i], layer + 1);
  }
 }
}
int main()
{
     
 TrieNode root;
 TrieNode n1;
 TrieNode n2;
 TrieNode n3;
 root.child['a' - 'a'] = &n1;
 root.child['b' - 'a'] = &n2;
 root.child['e' - 'a'] = &n3;
 n2.is_end = true;
 TrieNode n4;
 TrieNode n5;
 TrieNode n6;
 n1.child['b' - 'a'] = &n4;
 n2.child['c' - 'a'] = &n5;
 n3.child['f' - 'a'] = &n6;
 TrieNode n7;
 TrieNode n8;
 TrieNode n9;
 TrieNode n10;
 n4.child['c' - 'a'] = &n7;
 n4.child['d' - 'a'] = &n8;
 n5.child['d' - 'a'] = &n9;
 n6.child['g' - 'a'] = &n10;
 n7.is_end = true;
 n8.is_end = true;
 n9.is_end = true;
 n10.is_end = true;
 TrieNode n11;
 n7.child['d' - 'a'] = &n11;
 n11.is_end = true;
 preorder_trie(&root, 0);
 return 0;
}

运行结果为:

a
---b
------c(end)
---------d(end)
------d(end)
b(end)
---c
------d(end)
e
---f
------g(end)

对Trie树的整体测试

#include
#include
#define MAX_TRIE_CHAR_NUM 26
struct TrieNode
{
     
 TrieNode* child[MAX_TRIE_CHAR_NUM];
 bool is_end;
 TrieNode() : is_end(false)
 {
     
  for (int i = 0; i < MAX_TRIE_CHAR_NUM; i++)
  {
     
   child[i] = 0;
  }
 }
};
void get_all_word_from_trie(TrieNode* node, std::string& word, std::vector<std::string>& wordlist)
{
     
 for (int i = 0; i < MAX_TRIE_CHAR_NUM; i++)
 {
     
  if (node->child[i])
  {
     
   word.push_back(i + 'a');
   if (node->child[i]->is_end)
   {
     
    wordlist.push_back(word);
   }
   get_all_word_from_trie(node->child[i], word, wordlist);
   word.erase(word.length() - 1, 1);
  }
 }
}
void preorder_trie(TrieNode* node, int layer)
{
     
 for (int i = 0; i < MAX_TRIE_CHAR_NUM; i++)
 {
     
  if (node->child[i])
  {
     
   for (int j = 0; j < layer; j++)
   {
     
    printf("---");
   }
   printf("%c", i + 'a');
   if (node->child[i]->is_end)
   {
     
    printf("(end)");
   }
   printf("\n");
   preorder_trie(node->child[i], layer + 1);
  }
 }
}
class TrieTree
{
     
public:
 TrieTree() {
     }
 ~TrieTree()
 {
     
  for (int i = 0; i < _node_vec.size(); i++)
  {
     
   delete _node_vec[i];
  }
 }
 TrieNode _root;
 void insert(const char* word)
 {
     
  TrieNode* ptr = &_root;
  while (*word)
  {
     
   int pos = *word - 'a';
   if (!ptr->child[pos])
   {
     
    ptr->child[pos] = new_node();
   }
   ptr = ptr->child[pos];
   word++;
  }
  ptr->is_end = true;
 }
 bool search(const char* word)
 {
     
  TrieNode* ptr = &_root;
  while (*word)
  {
     
   int pos = *word - 'a';
   if (!ptr->child[pos])
   {
     
    return false;
   }
   ptr = ptr->child[pos];
   word++;
  }
  return ptr->is_end;
 }
 bool startsWith(const char* prefix)
 {
     
  TrieNode* ptr = &_root;
  while (*prefix)
  {
     
   int pos = *prefix - 'a';
   if (!ptr->child[pos])
   {
     
    return false;
   }
   ptr = ptr->child[pos];
   prefix++;
  }
  return true;
 }
private:
 TrieNode* new_node()
 {
     
  TrieNode* node = new TrieNode();
  _node_vec.push_back(node);
  return node;
 }
 std::vector<TrieNode*> _node_vec;
};
int main()
{
     
 TrieTree trie_tree;
 trie_tree.insert("abcd");
 trie_tree.insert("abc");
 trie_tree.insert("abd");
 trie_tree.insert("b");
 trie_tree.insert("bcd");
 trie_tree.insert("efg");
 printf("preorder_trie:\n");
 preorder_trie(&trie_tree._root,0);
 printf("\n");
 std::vector<std::string> word_list;
 std::string word;
 printf("All words:\n");
 get_all_word_from_trie(&trie_tree._root, word, word_list);
 for (int i = 0; i < word_list.size(); i++)
 {
     
  printf("%s\n",word_list[i].c_str());
 }
 printf("\n");
 printf("Search:\n");
 printf("abc:  %d\n",trie_tree.search("abc"));
 printf("abcd:  %d\n",trie_tree.search("abcd"));
 printf("bc:  %d\n",trie_tree.search("bc"));
 printf("b:  %d\n",trie_tree.search("b"));
 printf("\n");
 printf("ab:  %d\n", trie_tree.startsWith("ab"));
 printf("abc:  %d\n", trie_tree.startsWith("abc"));
 printf("bc:  %d\n", trie_tree.startsWith("bc"));
 printf("fg:  %d\n", trie_tree.startsWith("fg"));
 return 0;
}

运行结果:

preorder_trie:
a
-- - b
------c(end)
-------- - d(end)
------d(end)
b(end)
-- - c
------d(end)
e
-- - f
------g(end)

All words :
abc
abcd
abd
b
bcd
efg

Search :
abc:  1
abcd : 1
bc : 0
b : 1

ab : 1
abc : 1
bc : 1
fg : 0

你可能感兴趣的:(数据结构,字符串,高级数据结构,Trie树)