AC automation 模板

  1 /*

  2 1.对n个字符串构造tire树                        insertWord(node *root, char *word);

  3 2.bfs构造fail指针                    makeFail(node *root);

  4 3.基于以上两点的查询                query(node *root, char *str);

  5 */

  6 #include <stdio.h>

  7 #include <string.h>

  8 #include <queue>

  9 using namespace std;

 10 const int N1 = 50 + 10;

 11 const int N2 = 1000000 + 10;

 12 char key[N1];

 13 char desc[N2];

 14 struct node

 15 {

 16     node *next[26];

 17     int cnt;

 18     node *fail;

 19     node(){for(int i=0; i<26; ++i) next[i] = NULL; fail = NULL; cnt = 0;}

 20 };

 21 void insertWord(node *root)//构造trie树

 22 {

 23     node *cur = root;

 24     int n = strlen(key);

 25     for(int i=0; i<n; ++i)

 26     {

 27         int index = key[i] - 'a';

 28         if(cur->next[index] != NULL)

 29             cur = cur->next[index];

 30         else

 31         {

 32             cur->next[index] = new node();

 33             cur = cur->next[index];

 34         }

 35     }

 36     cur->cnt++;

 37 }

 38 void makeFail(node *root)//构造fail指针

 39 {

 40     queue<node*> q;

 41     q.push(root);

 42     node *cur;

 43     while(!q.empty())

 44     {    

 45         cur = q.front();

 46         q.pop();

 47         for(int i=0; i<26; ++i)

 48         {

 49             if(cur->next[i] != NULL)

 50             {

 51                 if(cur == root)//与root相连的结点,即第二层的结点的fail指针都是root

 52                     cur->next[i]->fail = root;

 53                 else

 54                 {

 55                     node *tmp = cur;

 56                     while(tmp->fail != NULL)// why while?

 57                     {

 58                         if(tmp->fail->next[i] != NULL)

 59                         {

 60                             cur->next[i]->fail = tmp->fail->next[i];

 61                             break;

 62                         }                            

 63                         tmp = tmp->fail;

 64                     }

 65                     if(tmp->fail == NULL)

 66                         cur->next[i]->fail = root;

 67                 }

 68                 q.push(cur->next[i]);

 69             }

 70         }

 71     }

 72 }

 73 int query(node *root)//查询

 74 {

 75     node *cur = root;

 76     node *tmp = NULL;

 77     int i=0,cnt=0;

 78     while(desc[i])

 79     {

 80         int index = desc[i] - 'a';

 81         while(cur!=root && cur->next[index] == NULL)

 82             cur = cur->fail;

 83         if(cur->next[index] != NULL)

 84         {

 85             cur = cur->next[index];

 86             tmp = cur;

 87             while(tmp != root && tmp->cnt!=0)

 88             {

 89                 cnt += tmp->cnt;

 90                 tmp->cnt = 0;

 91                 tmp = tmp->fail;

 92             }

 93         }

 94         i++;

 95     }

 96     return cnt;

 97 }

 98 int main()

 99 {

100     int t,n;

101     scanf("%d",&t);

102     while(t--)

103     {

104         node *root = new node();

105         scanf("%d",&n);

106         for(int i=0; i<n; ++i)

107         {

108             scanf("%s",key);

109             insertWord(root);

110         }

111         makeFail(root);

112         scanf("%s",desc);

113         int ans = query(root);

114         printf("%d\n",ans);

115     }

116     return 0;

117 }
View Code
  1 //多串匹配,n个模式字符串构成AC自动机,然后目标串去匹配,看目标串中有多少个模式串

  2 #include <stdio.h>

  3 #include <string.h>

  4 #include <queue>

  5 using namespace std;

  6 /*

  7 根结点的fail指针为NULL,根结点的直接子结点的fail指针为root,很明显,当一个字符都不匹配时,从根结点再开始匹配

  8 每个结点的fail指针都是由它父结点的fail指针决定的。

  9 */

 10 const int N = 1000000 + 10;

 11 struct node

 12 {

 13     node *next[26],*fail;

 14     int cnt;

 15     node(){for(int i=0; i<26; ++i) next[i] = NULL; fail = NULL; cnt = 0;}

 16 };

 17 void insertWord(node *root, char *word)

 18 {

 19     node *cur = root;

 20     int i = 0;

 21     while(word[i])

 22     {

 23         int index = word[i] - 'a';

 24         if(cur->next[index] == NULL)

 25             cur->next[index] = new node();

 26         cur = cur->next[index];

 27         ++i;

 28     }

 29     cur->cnt ++;

 30 }

 31 char str[N];

 32 void makeFail(node *root)

 33 {

 34     node *cur,*tmp;

 35     queue<node*> q;

 36     q.push(root);

 37     while(!q.empty())

 38     {

 39         cur = q.front();

 40         q.pop();

 41         for(int i=0; i<26; ++i)

 42         {

 43             if(cur->next[i] != NULL)

 44             {

 45                 q.push(cur->next[i]);

 46                 if(cur == root)//如果当前结点是root,那么它的直接孩子结点的fail指针指向root

 47                     cur->next[i]->fail = root;

 48                 else

 49                 {

 50                     tmp = cur;

 51                     while(tmp->fail != NULL)//because root->fail == NULL,如果到了这个地步,说明当前字符串没有后缀

 52                     {

 53                         if(tmp->fail->next[i] != NULL)

 54                         {

 55                             cur->next[i]->fail = tmp->fail->next[i];

 56                             break;

 57                         }

 58                         tmp = tmp->fail;

 59                     }

 60                     if(tmp->fail == NULL)

 61                         cur->next[i]->fail = root;

 62                 }

 63             }

 64         }

 65     }

 66 }

 67 

 68 // how to query???

 69 int query(node *root, char *str)

 70 {

 71     node *cur = root;

 72     node *tmp = NULL;

 73     int i = 0, cnt = 0;

 74     while(str[i])

 75     {

 76         int index = str[i] - 'a';

 77         while(cur!=root && cur->next[index]==NULL)//如果失配,那么直接跳到fail指针处去匹配

 78             cur = cur->fail;

 79         if(cur->next[index] != NULL)

 80         {

 81             cur = cur->next[index];//如果当前字符匹配成功,则跳到那个字符,

 82             tmp = cur;

 83             //这就是为什么Ac自动机效率高的缘故,根据fail指针,跳到当前字符串的最长后缀去

 84             //如果tmp->cnt != 0 说明存在该最长后缀形成的字符串

 85             while(tmp->cnt!=0)

 86             {

 87                 cnt += tmp->cnt;

 88                 tmp->cnt = 0;

 89                 tmp = tmp->fail;

 90             }

 91         }

 92         ++i;

 93     }

 94     return cnt;

 95 }

 96 int main()

 97 {

 98     int t,n;

 99     scanf("%d",&t);

100     char word[55];

101     while(t--)

102     {

103         node *root = new node();

104         scanf("%d",&n);

105         for(int i=0; i<n; ++i)

106         {

107             scanf("%s",word);

108             insertWord(root, word);

109         }

110         scanf("%s",str);

111         makeFail(root);

112         int ans = query(root, str);

113         printf("%d\n",ans);

114     }

115     return 0;

给定n个模式串,长度均不超过m,和一个目标串(长度为L),问目标串中包含多少个模式串(可重叠
的)。
暴力算法是一个个模式串去与目标串匹配,时间复杂度是O(n*m*L)
有更好的算法是AC自动机,时间复杂度是O(n)(这个怎么算来着??)

AC自动机分为两步,1.构建trie树。2.构建fail指针。正是这个fail指针将时间复杂度给大大缩小了

fail指针是匹配失败时,该跳到那个结点去重新匹配
fail指针是指向当前字符串的最长后缀,比如she的fail指针应该指向he或e或root(即指向存在的最长后
缀)
所以当前结点的fail指针由父结点的fail指针所决定

 

 学习资料:http://www.cppblog.com/menjitianya/archive/2014/07/10/207604.html

你可能感兴趣的:(automation)