AC自动机简介:KMP是用于解决单模式串匹配问题, AC自动机用于解决多模式串匹配问题。
精华:设这个节点上的字母为C,沿着他父亲的失败指针走,直到走到一个节点,他的儿子中也有字母为C的节点。然后把当前节点的失败指针指向那个字目也为C的儿子。如果一直走到了root都没找到,那就把失败指针指向root。
如果用KMP来解决多模式串匹配问题,则复杂度为O(n + k * m), 而AC自动机的负责度为O(n + m + z), z为模式串出现的次数。
学习链接:
http://hi.baidu.com/nialv7/item/ce1ce015d44a6ba7feded52d
http://blog.csdn.net/niushuai666/article/details/7002823
http://www.cnblogs.com/kuangbin/p/3164106.html
题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=2222
思路:AC自动机的入门题,用的是bin牛的模板,统计End数组即可,统计过的需要清0.
1 #include <iostream> 2 #include <cstdio> 3 #include <cstring> 4 #include <algorithm> 5 #include <queue> 6 #define FOR(i, a, b) for (int i = (a); i < (b); ++i) 7 #define REP(i, a, b) for (int i = (a); i <= (b); ++i) 8 using namespace std; 9 10 const int MAX_N = (500000 + 500); 11 struct Trie { 12 int next[MAX_N][26], End[MAX_N], fail[MAX_N]; 13 int root, L; 14 int NewNode() 15 { 16 FOR(i, 0, 26) next[L][i] = -1; 17 End[L++] = 0; 18 return L - 1; 19 } 20 void Init() 21 { 22 L = 0; 23 root = NewNode(); 24 } 25 void Insert(char *str) 26 { 27 int len = strlen(str), now = root; 28 FOR(i, 0, len) { 29 int id = str[i] - 'a'; 30 if (next[now][id] == -1) next[now][id] = NewNode(); 31 now = next[now][id]; 32 } 33 ++End[now]; 34 } 35 void Build() 36 { 37 queue<int > que; 38 fail[root] = root; 39 FOR(i, 0, 26) { 40 if (next[root][i] == -1) next[root][i] = root; 41 else { 42 fail[next[root][i]] = root; 43 que.push(next[root][i]); 44 } 45 } 46 while (!que.empty()) { 47 int now = que.front(); 48 que.pop(); 49 FOR(i, 0, 26) { 50 if (next[now][i] == -1) { 51 next[now][i] = next[fail[now]][i]; 52 } else { 53 fail[next[now][i]] = next[fail[now]][i]; 54 que.push(next[now][i]); 55 } 56 } 57 } 58 } 59 int Query(char *str) 60 { 61 int len = strlen(str), now = root, res = 0; 62 FOR(i, 0, len) { 63 int id = str[i] - 'a'; 64 now = next[now][id]; 65 int tmp = now; 66 while (tmp != root) { 67 res += End[tmp]; 68 End[tmp] = 0; 69 tmp = fail[tmp]; 70 } 71 } 72 return res; 73 } 74 } AC; 75 76 int n; 77 char str[1000000 + 100]; 78 79 int main() 80 { 81 int Cas; 82 scanf("%d", &Cas); 83 while (Cas--) { 84 AC.Init(); 85 scanf("%d", &n); 86 REP(i, 1, n) { 87 scanf("%s", str); 88 AC.Insert(str); 89 } 90 AC.Build(); 91 scanf("%s", str); 92 printf("%d\n", AC.Query(str)); 93 } 94 return 0; 95 }
题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=2896
思路:和上题差不多,只是用End数组来记录序号而已。
1 #include <iostream> 2 #include <cstdio> 3 #include <cstring> 4 #include <algorithm> 5 #include <queue> 6 #include <vector> 7 #define FOR(i, a, b) for (int i = (a); i < (b); ++i) 8 #define REP(i, a, b) for (int i = (a); i <= (b); ++i) 9 using namespace std; 10 11 const int MAX_N = (100000 + 1000); 12 struct Trie { 13 14 int next[MAX_N][128], End[MAX_N], fail[MAX_N]; 15 int root, L; 16 int NewNode() { 17 FOR(i, 0, 128) next[L][i] = -1; 18 End[L++] = 0; 19 return L - 1; 20 } 21 void Init() { 22 L = 0; 23 root = NewNode(); 24 } 25 26 void Insert(char *str, int index) { 27 int len = strlen(str), now = root; 28 FOR(i, 0, len) { 29 int id = str[i]; 30 if (next[now][id] == -1) next[now][id] = NewNode(); 31 now = next[now][id]; 32 } 33 End[now] = index; 34 } 35 void Build() { 36 queue<int > que; 37 fail[root] = root; 38 FOR(i, 0, 128) { 39 if (next[root][i] == -1) next[root][i] = root; 40 else { 41 fail[next[root][i]] = root; 42 que.push(next[root][i]); 43 } 44 } 45 while (!que.empty()) { 46 int now = que.front(); 47 que.pop(); 48 FOR(i, 0, 128) { 49 if (next[now][i] == -1) { 50 next[now][i] = next[fail[now]][i]; 51 } else { 52 fail[next[now][i]] = next[fail[now]][i]; 53 que.push(next[now][i]); 54 } 55 } 56 } 57 } 58 void Query(char *str, vector<int > &ans) { 59 int len = strlen(str), now = root; 60 FOR(i, 0, len) { 61 now = next[now][str[i]]; 62 int tmp = now; 63 while (tmp != root) { 64 if (End[tmp]) ans.push_back(End[tmp]); 65 tmp = fail[tmp]; 66 } 67 } 68 } 69 70 } AC; 71 72 int N, M, res; 73 char str[10000 + 100]; 74 vector<int > ans[1000 + 100]; 75 76 int main() 77 { 78 AC.Init(); 79 scanf("%d", &N); 80 REP(i, 1, N) { 81 scanf("%s", str); 82 AC.Insert(str, i); 83 } 84 AC.Build(); 85 scanf("%d", &M); 86 FOR(i, 0, M) { 87 scanf("%s", str); 88 AC.Query(str, ans[i]); 89 } 90 res = 0; 91 FOR(i, 0, M) { 92 if ((int)ans[i].size()) { 93 printf("web %d:", i + 1); 94 sort(ans[i].begin(), ans[i].end()); 95 FOR(j, 0, (int)ans[i].size()) printf(" %d", ans[i][j]); 96 puts(""); 97 ++res; 98 } 99 } 100 printf("total: %d\n", res); 101 return 0; 102 }
题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=3065
思路:用一个数组来记录模式串在主串中出现的次数。
1 #include <iostream> 2 #include <cstdio> 3 #include <cstring> 4 #include <algorithm> 5 #include <queue> 6 #define FOR(i, a, b) for (int i = (a); i < (b); ++i) 7 #define REP(i, a, b) for (int i = (a); i <= (b); ++i) 8 using namespace std; 9 10 const int MAX_N = (50000 + 500); 11 12 int N, num[1000 + 100]; 13 char ss[1000 + 100][55]; 14 char str[2000000 + 200]; 15 16 struct Trie { 17 int next[MAX_N][128], End[MAX_N], fail[MAX_N]; 18 int root, L; 19 int NewNode() { 20 FOR(i, 0, 128) next[L][i] = -1; 21 End[L++] = -1; 22 return L - 1; 23 } 24 25 void Init() { 26 L = 0; 27 root = NewNode(); 28 } 29 30 void Insert(char *str, int index) { 31 int len = strlen(str), now = root; 32 FOR(i, 0, len) { 33 if (next[now][str[i]] == -1) next[now][str[i]] = NewNode(); 34 now = next[now][str[i]]; 35 } 36 End[now] = index; 37 } 38 39 void Build() { 40 queue<int > que; 41 fail[root] = root; 42 FOR(i, 0, 128) { 43 if (next[root][i] == -1) next[root][i] = root; 44 else { 45 fail[next[root][i]] = root; 46 que.push(next[root][i]); 47 } 48 } 49 while (!que.empty()) { 50 int now = que.front(); 51 que.pop(); 52 FOR(i, 0, 128) { 53 if (next[now][i] == -1) next[now][i] = next[fail[now]][i]; 54 else { 55 fail[next[now][i]] = next[fail[now]][i]; 56 que.push(next[now][i]); 57 } 58 } 59 } 60 } 61 62 void Query(char *str) { 63 memset(num, 0, sizeof(num)); 64 int len = strlen(str), now = root; 65 FOR(i, 0, len) { 66 now = next[now][str[i]]; 67 int tmp = now; 68 while (tmp != root) { 69 if (End[tmp] != -1) ++num[End[tmp]]; 70 tmp = fail[tmp]; 71 } 72 } 73 FOR(i, 0, N) { 74 if (num[i]) printf("%s: %d\n", ss[i], num[i]); 75 } 76 } 77 78 } AC; 79 80 81 int main() 82 { 83 while (~scanf("%d", &N)) { 84 AC.Init(); 85 scanf("%d", &N); 86 FOR(i, 0, N) { 87 scanf("%s", ss[i]); 88 AC.Insert(ss[i], i); 89 } 90 AC.Build(); 91 scanf("%s", str); 92 AC.Query(str); 93 } 94 return 0; 95 }
题目链接:http://poj.org/problem?id=2778
思路:需要用到的知识:有向图中点A到点B走K步的路径数等于有向图原始矩阵的K次幂。然后对于已经建好的Trie图,我们就可以建图了,如果某个节点A不是终止节点并且这个节点的next节点B也不是终止节点,那么就连边(表示从A点走1步到节点B的方法有1种)。建好图之后就是矩阵的快速幂了,然后在统计节点0(根节点)到其余节点走N步的方法数的总和。
1 #include <iostream> 2 #include <cstdio> 3 #include <cstring> 4 #include <algorithm> 5 #include <queue> 6 #define REP(i, a, b) for (int i = (a); i < (b); ++i) 7 #define FOR(i, a, b) for (int i = (a); i <= (b); ++i) 8 using namespace std; 9 10 const int MAX_N = (100 + 10); 11 const int MOD = (100000); 12 int M, N; 13 char str[22]; 14 15 struct Matrix { 16 long long mat[MAX_N][MAX_N]; 17 int n; 18 Matrix() {} 19 Matrix(int _n) 20 { 21 n = _n; 22 REP(i, 0, n) 23 REP(j, 0, n) mat[i][j] = 0; 24 } 25 Matrix operator *(const Matrix &b) const 26 { 27 Matrix c = Matrix(n); 28 REP(i, 0, n) { 29 REP(j, 0, n) { 30 REP(k, 0, n) { 31 c.mat[i][j] += mat[i][k] * b.mat[k][j]; 32 if (c.mat[i][j] >= MOD) c.mat[i][j] %= MOD; 33 } 34 } 35 } 36 return c; 37 } 38 39 }; 40 41 Matrix Pow(Matrix mat, int n) 42 { 43 Matrix ONE = Matrix(mat.n); 44 REP(i, 0, mat.n) ONE.mat[i][i] = 1; 45 Matrix tmp = mat; 46 while (n) { 47 if (n & 1) ONE = ONE * tmp; 48 n >>= 1; 49 tmp = tmp * tmp; 50 } 51 return ONE; 52 } 53 54 struct Trie { 55 int next[MAX_N][4], End[MAX_N], fail[MAX_N]; 56 int L, root; 57 int NewNode() 58 { 59 REP(i, 0, 4) next[L][i] = -1; 60 End[L++] = 0; 61 return L - 1; 62 } 63 64 void Init() 65 { 66 L = 0; 67 root = NewNode(); 68 } 69 70 int getID(char ch) 71 { 72 if (ch == 'A') return 0; 73 if (ch == 'C') return 1; 74 if (ch == 'G') return 2; 75 if (ch == 'T') return 3; 76 } 77 78 void Insert(char *str) 79 { 80 int len = strlen(str), now = root; 81 REP(i, 0, len) { 82 int id = getID(str[i]); 83 if (next[now][id] == -1) next[now][id] = NewNode(); 84 now = next[now][id]; 85 } 86 End[now] = 1; 87 } 88 89 void Build() 90 { 91 queue<int > que; 92 fail[root] = root; 93 REP(i ,0, 4) { 94 if (next[root][i] == -1) next[root][i] = root; 95 else { 96 fail[next[root][i]] = root; 97 que.push(next[root][i]); 98 } 99 } 100 while (!que.empty()) { 101 int now = que.front(); 102 que.pop(); 103 if (End[fail[now]]) End[now] = 1; 104 REP(i, 0, 4) { 105 if (next[now][i] == -1) next[now][i] = next[fail[now]][i]; 106 else { 107 fail[next[now][i]] = next[fail[now]][i]; 108 que.push(next[now][i]); 109 } 110 } 111 } 112 } 113 114 Matrix getMatrix() 115 { 116 Matrix res = Matrix(L); 117 REP(i, 0, L) 118 REP(j, 0, 4) if (!End[next[i][j]]) ++res.mat[i][next[i][j]]; 119 return res; 120 } 121 122 } AC; 123 124 125 int main() 126 { 127 while (~scanf("%d %d", &M, &N)) { 128 AC.Init(); 129 FOR(i, 1, M) scanf("%s", str), AC.Insert(str); 130 AC.Build(); 131 Matrix tmp = AC.getMatrix(); 132 tmp = Pow(tmp, N); 133 long long ans = 0; 134 REP(i, 0, tmp.n) { 135 ans += tmp.mat[0][i]; 136 if (ans >= MOD) ans %= MOD; 137 } 138 printf("%lld\n", ans); 139 } 140 return 0; 141 }