2010-06-21 20:10:05 关于Aho-Corasick (AC) 自动机的三道题目
昨天学习了一下AC自动机是什么东西,做了三道题至于AC自动机的详细解释和资料可以通过搜索 Aho Corasick 找到很多相关论文和资料,我就不要班门弄斧了。
第一道hdu 2222,AC自动机的基本是用方法,直接应用在多模式匹配上。
第二道pku 2778 利用AC自动机减少状态量,之后将状态转移做成矩阵,利用矩阵的二分乘法得到log级别的复杂度。
第三道pku 3691 使用同样是利用AC自动机减少状态量,由于状态数太大不可能实现的dp,变成可能。
下面是后两个题的代码。
pku2778
1 const int MOD = 100000;
2
3 struct L
4 {
5 char c;
6 int stat;
7 bool isend;
8 L* next[4], *fail;
9 L () { isend = 0,memset(next, 0, sizeof(next)), fail = NULL;}
10 }pool[100];
11
12 int top;
13 L * newL()
14 {
15 L * t = &pool[top];
16 t->stat = top++;
17 return t;
18 }
19
20 int c2i(char t)
21 {
22 if (t == 'A') { return 0; }
23 if (t == 'C') { return 1; }
24 if (t == 'G') { return 2; }
25 if (t == 'T') { return 3; }
26 return -1;
27 }
28
29 class ACAutomata
30 {
31 public:
32 L * root;
33 void init()
34 {
35 memset(pool, 0, sizeof(pool));
36 top = 0;
37 root = newL();
38 }
39 ACAutomata(){ init();}
40
41 void ins(char *s)
42 {
43 L * r = root;
44 for (;*s;s++) {
45 int t = c2i(*s);
46 if (r-> next[t] == NULL) {
47 r-> next[t] = newL();
48 }
49 r = r-> next[t];
50 r-> c = *s;
51 }
52 r->isend = true;
53 }
54
55 void pr()
56 {
57 int i, j;
58 queue<L*> que;
59 que.push(root);
60 while (!que.empty()) {
61 int sz = que.size();
62 while (sz--) {
63 L* r = que.front();que.pop();
64 if (r == NULL) {
65 putchar('*');
66 continue;
67 }
68 printf("[%d, %d", r->stat, r->fail->stat);
69 if (r->isend) {
70 printf(" #] ");
71 }else {
72 printf(" ] ");
73 }
74 for (i = 0;i < 4;i++) {
75 if (r->next[i]) {
76 que.push(r->next[i]);
77 }
78 }
79 que.push(NULL);
80 }
81 putchar(10);
82 }
83 }
84
85 void finish()
86 {
87 int i;
88 queue<L*> que;
89 root->fail = root;
90 for (i = 0;i < 4;i++) {
91 if (root-> next[i]) {
92 que.push(root-> next[i]);
93 root-> next[i]-> fail = root;
94 }
95 }
96 while (!que.empty()) {
97 L *r = que.front();que.pop();
98 for (i = 0;i < 4;i++) {
99 if (r->next[i]) {
100 L *u = r-> next[i]; que.push(u);
101 L *v = r-> fail;
102 while (v != root && v-> next[i] == NULL) { v = v-> fail; }
103 if (v-> next[i]) {
104 u-> fail = v-> next[i];
105 }else {
106 u-> fail = root;
107 }
108 u->isend |= u->fail->isend;
109 }
110 }
111 }
112 }
113 }AC;
114
115 int m, n, dim;
116 char s[51];
117
118 struct Matrix {
119 LL a[100][100];
120 Matrix() { memset(a, 0, sizeof(a)); }
121 LL * operator [](int idx) {
122 return a[idx];
123 }
124 void pr()
125 {
126 int i, j;
127 for (i = 0;i < dim;i++) {
128 for (j = 0;j < dim;j++) {
129 printf("%lld ", a[i][j]);
130 }
131 putchar(10);
132 }
133 putchar(10);
134 }
135 }fac, r;
136
137 void makeMatrix()
138 {
139 int i,j;
140 queue<L*> que;
141 que.push(AC.root);
142 while (!que.empty()) {
143 int sz = que.size();
144 while (sz--) {
145 L* r = que.front();que.pop();
146 int u = r->stat;
147 int cnt = 0;
148 for (i = 0;i < 4;i++) {
149 if (r->next[i]) {
150 if (!r->next[i]->isend) {
151 int v = r->next[i]->stat;
152 fac[v][u] ++;
153 que.push(r->next[i]);
154 }
155 }else {
156 L * cur = r->fail;
157 while (cur != AC.root && cur->next[i] == NULL) { cur = cur->fail; }
158 if (cur->next[i]) {
159 cur = cur->next[i];
160 }
161 if (!cur->isend) {
162 int v = cur->stat;
163 fac[v][u] ++;
164 }
165 }
166 }
167 }
168 }
169 dim = top;
170 for (i = 0;i < dim;i++) {
171 for (j = 0;j < dim;j++) {
172 fac[i][j] %= MOD;
173 }
174 }
175 }
176
177 Matrix mul(Matrix a, Matrix b)
178 {
179 int i,j,k;
180 Matrix r;
181 for (i = 0;i < dim;i++) {
182 for (j = 0;j < dim;j++) {
183 for (k = 0;k < dim;k++) {
184 r[i][j] = (r[i][j] + (a[i][k] * b[k][j]) % MOD ) % MOD;
185 }
186 }
187 }
188 return r;
189 }
190
191 Matrix pow(const Matrix &a, int times)
192 {
193 if (times == 1) {
194 return a;
195 }
196 Matrix r = pow(a, times >> 1);
197 r = mul(r, r);
198 if (times & 1) {
199 r = mul(r, a);
200 }
201 return r;
202 }
203
204 int main()
205 {
206 int i, j, k;
207 scanf("%d %d", &m, &n);
208 while (m--) {
209 scanf("%s", s);
210 AC.ins(s);
211 }
212 AC.finish();
213 if (Debug) {
214 AC.pr();
215 }
216 makeMatrix();
217 if (Debug) {
218 fac.pr();
219 }
220 Matrix r = pow(fac, n);
221
222 int ans = 0;
223 for (i = 0;i < dim;i++) {
224 ans = (ans + r[i][0]) % MOD;
225 }
226 printf("%d\n", ans);
227 return 0;
228 }
229
1 const int MOD = 100000;
2
3 struct L
4 {
5 char c;
6 int stat;
7 bool isend;
8 L* next[4], *fail;
9 L () { isend = 0,memset(next, 0, sizeof(next)), fail = NULL;}
10 }pool[100];
11
12 int top;
13 L * newL()
14 {
15 L * t = &pool[top];
16 t->stat = top++;
17 return t;
18 }
19
20 int c2i(char t)
21 {
22 if (t == 'A') { return 0; }
23 if (t == 'C') { return 1; }
24 if (t == 'G') { return 2; }
25 if (t == 'T') { return 3; }
26 return -1;
27 }
28
29 class ACAutomata
30 {
31 public:
32 L * root;
33 void init()
34 {
35 memset(pool, 0, sizeof(pool));
36 top = 0;
37 root = newL();
38 }
39 ACAutomata(){ init();}
40
41 void ins(char *s)
42 {
43 L * r = root;
44 for (;*s;s++) {
45 int t = c2i(*s);
46 if (r-> next[t] == NULL) {
47 r-> next[t] = newL();
48 }
49 r = r-> next[t];
50 r-> c = *s;
51 }
52 r->isend = true;
53 }
54
55 void pr()
56 {
57 int i, j;
58 queue<L*> que;
59 que.push(root);
60 while (!que.empty()) {
61 int sz = que.size();
62 while (sz--) {
63 L* r = que.front();que.pop();
64 if (r == NULL) {
65 putchar('*');
66 continue;
67 }
68 printf("[%d, %d", r->stat, r->fail->stat);
69 if (r->isend) {
70 printf(" #] ");
71 }else {
72 printf(" ] ");
73 }
74 for (i = 0;i < 4;i++) {
75 if (r->next[i]) {
76 que.push(r->next[i]);
77 }
78 }
79 que.push(NULL);
80 }
81 putchar(10);
82 }
83 }
84
85 void finish()
86 {
87 int i;
88 queue<L*> que;
89 root->fail = root;
90 for (i = 0;i < 4;i++) {
91 if (root-> next[i]) {
92 que.push(root-> next[i]);
93 root-> next[i]-> fail = root;
94 }
95 }
96 while (!que.empty()) {
97 L *r = que.front();que.pop();
98 for (i = 0;i < 4;i++) {
99 if (r->next[i]) {
100 L *u = r-> next[i]; que.push(u);
101 L *v = r-> fail;
102 while (v != root && v-> next[i] == NULL) { v = v-> fail; }
103 if (v-> next[i]) {
104 u-> fail = v-> next[i];
105 }else {
106 u-> fail = root;
107 }
108 u->isend |= u->fail->isend;
109 }
110 }
111 }
112 }
113 }AC;
114
115 int m, n, dim;
116 char s[51];
117
118 struct Matrix {
119 LL a[100][100];
120 Matrix() { memset(a, 0, sizeof(a)); }
121 LL * operator [](int idx) {
122 return a[idx];
123 }
124 void pr()
125 {
126 int i, j;
127 for (i = 0;i < dim;i++) {
128 for (j = 0;j < dim;j++) {
129 printf("%lld ", a[i][j]);
130 }
131 putchar(10);
132 }
133 putchar(10);
134 }
135 }fac, r;
136
137 void makeMatrix()
138 {
139 int i,j;
140 queue<L*> que;
141 que.push(AC.root);
142 while (!que.empty()) {
143 int sz = que.size();
144 while (sz--) {
145 L* r = que.front();que.pop();
146 int u = r->stat;
147 int cnt = 0;
148 for (i = 0;i < 4;i++) {
149 if (r->next[i]) {
150 if (!r->next[i]->isend) {
151 int v = r->next[i]->stat;
152 fac[v][u] ++;
153 que.push(r->next[i]);
154 }
155 }else {
156 L * cur = r->fail;
157 while (cur != AC.root && cur->next[i] == NULL) { cur = cur->fail; }
158 if (cur->next[i]) {
159 cur = cur->next[i];
160 }
161 if (!cur->isend) {
162 int v = cur->stat;
163 fac[v][u] ++;
164 }
165 }
166 }
167 }
168 }
169 dim = top;
170 for (i = 0;i < dim;i++) {
171 for (j = 0;j < dim;j++) {
172 fac[i][j] %= MOD;
173 }
174 }
175 }
176
177 Matrix mul(Matrix a, Matrix b)
178 {
179 int i,j,k;
180 Matrix r;
181 for (i = 0;i < dim;i++) {
182 for (j = 0;j < dim;j++) {
183 for (k = 0;k < dim;k++) {
184 r[i][j] = (r[i][j] + (a[i][k] * b[k][j]) % MOD ) % MOD;
185 }
186 }
187 }
188 return r;
189 }
190
191 Matrix pow(const Matrix &a, int times)
192 {
193 if (times == 1) {
194 return a;
195 }
196 Matrix r = pow(a, times >> 1);
197 r = mul(r, r);
198 if (times & 1) {
199 r = mul(r, a);
200 }
201 return r;
202 }
203
204 int main()
205 {
206 int i, j, k;
207 scanf("%d %d", &m, &n);
208 while (m--) {
209 scanf("%s", s);
210 AC.ins(s);
211 }
212 AC.finish();
213 if (Debug) {
214 AC.pr();
215 }
216 makeMatrix();
217 if (Debug) {
218 fac.pr();
219 }
220 Matrix r = pow(fac, n);
221
222 int ans = 0;
223 for (i = 0;i < dim;i++) {
224 ans = (ans + r[i][0]) % MOD;
225 }
226 printf("%d\n", ans);
227 return 0;
228 }
229
pku3691
1 const int N = 1024;
2 struct L {
3 int stat;
4 int c, isend;
5 L* fail;
6 L* next[4];
7 L() { stat = 0, c = 0, fail = NULL, memset(next, 0, sizeof(next));}
8 }pool[N], *root;
9 const int inf = 0x7f7f7f7f;
10 int top;
11 int dp[1024][1024];
12 char s[2048];
13 int n, len;
14
15 int c2i(char t)
16 {
17 if (t == 'A') { return 0; }
18 if (t == 'C') { return 1; }
19 if (t == 'G') { return 2; }
20 if (t == 'T') { return 3; }
21 return -1;
22 }
23
24 void init()
25 {
26 top = 0;
27 memset(pool, 0, sizeof(pool));
28 memset(dp, 0x7f, sizeof(dp));
29 dp[0][0] = 0;
30 root = &pool[top++];
31 }
32
33 void ins(char s[])
34 {
35 L* r = root;
36 for (char * t = s;*t;t++) {
37 int idx = c2i(*t);
38 if (r->next[idx] == NULL) {
39 r->next[idx] = &pool[top];
40 r->next[idx]->stat = top++;
41 }
42 r = r->next[idx];
43 }
44 r->isend = true;
45 }
46
47 void finish()
48 {
49 int i;
50 queue<L*> que;
51 root->fail = root;
52 for (i = 0;i < 4;i++) {
53 if (root->next[i]) {
54 que.push(root->next[i]);
55 root->next[i]->fail = root;
56 }
57 }
58 while (!que.empty()) {
59 L* r = que.front();que.pop();
60 for (i = 0;i < 4;i++) if (r->next[i]) {
61 L *u = r->next[i]; que.push(r->next[i]);
62 L *v = r->fail;
63 while (v != root && v->next[i] == NULL) { v = v->fail; }
64 if (v->next[i]) { v = v->next[i]; }
65 u->fail = v;
66 u->isend |= u->fail->isend;
67 }
68 }
69 }
70
71 void pr(L *root)
72 {
73 int i,j,k;
74 queue<L*> que;
75 que.push(root);
76 while (!que.empty()) {
77 int sz = que.size();
78 while (sz--) {
79 L* r = que.front();que.pop();
80 if (r == NULL) {
81 printf("* ");
82 continue;
83 }
84 printf("[%d,%d] ", r->stat, r->fail->stat);
85 for (i = 0;i < 4;i++) {
86 if (r->next[i]) {
87 que.push(r->next[i]);
88 }
89 }
90 que.push(NULL);
91 }
92 putchar(10);
93 }
94 for (i = 0;i < top;i++) {
95 printf("[%d, %d] ", pool[i].stat, pool[i].fail->stat);
96 }
97 putchar(10);
98 }
99
100 int main()
101 {
102 L* r;
103 int i,j,k, stat, testid = 1;
104 while(scanf("%d", &n) == 1 && n) {
105 init();
106 for (i = 0;i < n;i++) {
107 scanf("%s", s);
108 ins(s);
109 }
110 finish();
111 if (Debug) {
112 pr(root);
113 }
114 scanf("%s", s + 1);
115 len = strlen(s + 1);
116 for (i = 1;i <= len;i++) {
117 for (j = 0;j < top;j++) {
118 for (k = 0;k < 4;k++) {
119 if (dp[i-1][j] < inf && !pool[j].isend) {
120 int cost = (k != c2i(s[i]));
121 if (pool[j].next[k]) {
122 r = pool[j].next[k];
123 }else {
124 r = pool[j].fail;
125 while (r != root && r->next[k] == NULL) {
126 r = r->fail;
127 }
128 if (r->next[k]) {
129 r = r->next[k];
130 }
131 }
132 if (!r->isend) {
133 stat = r->stat;
134 ckmin(dp[i][stat], dp[i-1][j] + cost);
135 }
136 }
137 }
138 }
139 }
140 if (Debug) {
141 printf("info\n");
142 for (i = 0 ;i <= len;i++) {
143 for (j = 0;j < top;j++) {
144 printf("%12d", dp[i][j]);
145 }
146 putchar(10);
147 }
148 putchar(10);
149 }
150 int ans = inf;
151 for (i = 0;i < top;i++) {
152 ckmin(ans, dp[len][i]);
153 }
154 if (ans == inf) {
155 ans = -1;
156 }
157 printf("Case %d: %d\n", testid++, ans);
158 }
159
160 return 0;
161 }
162
163
1 const int N = 1024;
2 struct L {
3 int stat;
4 int c, isend;
5 L* fail;
6 L* next[4];
7 L() { stat = 0, c = 0, fail = NULL, memset(next, 0, sizeof(next));}
8 }pool[N], *root;
9 const int inf = 0x7f7f7f7f;
10 int top;
11 int dp[1024][1024];
12 char s[2048];
13 int n, len;
14
15 int c2i(char t)
16 {
17 if (t == 'A') { return 0; }
18 if (t == 'C') { return 1; }
19 if (t == 'G') { return 2; }
20 if (t == 'T') { return 3; }
21 return -1;
22 }
23
24 void init()
25 {
26 top = 0;
27 memset(pool, 0, sizeof(pool));
28 memset(dp, 0x7f, sizeof(dp));
29 dp[0][0] = 0;
30 root = &pool[top++];
31 }
32
33 void ins(char s[])
34 {
35 L* r = root;
36 for (char * t = s;*t;t++) {
37 int idx = c2i(*t);
38 if (r->next[idx] == NULL) {
39 r->next[idx] = &pool[top];
40 r->next[idx]->stat = top++;
41 }
42 r = r->next[idx];
43 }
44 r->isend = true;
45 }
46
47 void finish()
48 {
49 int i;
50 queue<L*> que;
51 root->fail = root;
52 for (i = 0;i < 4;i++) {
53 if (root->next[i]) {
54 que.push(root->next[i]);
55 root->next[i]->fail = root;
56 }
57 }
58 while (!que.empty()) {
59 L* r = que.front();que.pop();
60 for (i = 0;i < 4;i++) if (r->next[i]) {
61 L *u = r->next[i]; que.push(r->next[i]);
62 L *v = r->fail;
63 while (v != root && v->next[i] == NULL) { v = v->fail; }
64 if (v->next[i]) { v = v->next[i]; }
65 u->fail = v;
66 u->isend |= u->fail->isend;
67 }
68 }
69 }
70
71 void pr(L *root)
72 {
73 int i,j,k;
74 queue<L*> que;
75 que.push(root);
76 while (!que.empty()) {
77 int sz = que.size();
78 while (sz--) {
79 L* r = que.front();que.pop();
80 if (r == NULL) {
81 printf("* ");
82 continue;
83 }
84 printf("[%d,%d] ", r->stat, r->fail->stat);
85 for (i = 0;i < 4;i++) {
86 if (r->next[i]) {
87 que.push(r->next[i]);
88 }
89 }
90 que.push(NULL);
91 }
92 putchar(10);
93 }
94 for (i = 0;i < top;i++) {
95 printf("[%d, %d] ", pool[i].stat, pool[i].fail->stat);
96 }
97 putchar(10);
98 }
99
100 int main()
101 {
102 L* r;
103 int i,j,k, stat, testid = 1;
104 while(scanf("%d", &n) == 1 && n) {
105 init();
106 for (i = 0;i < n;i++) {
107 scanf("%s", s);
108 ins(s);
109 }
110 finish();
111 if (Debug) {
112 pr(root);
113 }
114 scanf("%s", s + 1);
115 len = strlen(s + 1);
116 for (i = 1;i <= len;i++) {
117 for (j = 0;j < top;j++) {
118 for (k = 0;k < 4;k++) {
119 if (dp[i-1][j] < inf && !pool[j].isend) {
120 int cost = (k != c2i(s[i]));
121 if (pool[j].next[k]) {
122 r = pool[j].next[k];
123 }else {
124 r = pool[j].fail;
125 while (r != root && r->next[k] == NULL) {
126 r = r->fail;
127 }
128 if (r->next[k]) {
129 r = r->next[k];
130 }
131 }
132 if (!r->isend) {
133 stat = r->stat;
134 ckmin(dp[i][stat], dp[i-1][j] + cost);
135 }
136 }
137 }
138 }
139 }
140 if (Debug) {
141 printf("info\n");
142 for (i = 0 ;i <= len;i++) {
143 for (j = 0;j < top;j++) {
144 printf("%12d", dp[i][j]);
145 }
146 putchar(10);
147 }
148 putchar(10);
149 }
150 int ans = inf;
151 for (i = 0;i < top;i++) {
152 ckmin(ans, dp[len][i]);
153 }
154 if (ans == inf) {
155 ans = -1;
156 }
157 printf("Case %d: %d\n", testid++, ans);
158 }
159
160 return 0;
161 }
162
163