题目大意:
给你N个致病基因(长度各不超过20), 再给你一个DNA一条链的碱基序列(长度M不超过1000), 要求尽量少的修改碱基, 使得序列不含致病基因, 不能的话输出-1.
简要分析:
有赤裸裸的多模式串匹配, 当然要建立AC自动机了. 把一些状态标成致病态(当然把fail指针的信心合并过来). 设f[i][s]表示到DNA的第i个碱基, 在自动机上走到s状态, 至少的修改次数. 转移就枚举AGCT进行转移就行了, 注意不能走到致病状态. 于是答案为min{f[M][s]}, s为非致病状态. 有一种类似的问题M奇大无比, 而状态数又不多, 敏感的想到矩阵乘法就行了.
代码实现:
1 #include <cstdio>
2 #include <cstdlib>
3 #include <cstring>
4 #include <queue>
5 #include <algorithm>
6 using namespace std;
7
8 const int MAX_N = 50, MAX_P = 20, MAX_W = 1000, SON = 4, INF = 0x3f3f3f3f;
9 char t[MAX_P + 1], s[MAX_W + 1];
10 int n, idx[256];
11 int f[2][MAX_N * MAX_P + 1], now, pre;
12
13 struct node_t {
14 node_t *son[SON], *fail;
15 bool v;
16 } node_pool[MAX_N * MAX_P + 1], *node_idx, *root;
17
18 node_t *node_alloc() {
19 node_t *ret = node_idx ++;
20 memset(ret -> son, 0, sizeof(ret -> son));
21 ret -> fail = NULL;
22 ret -> v = 0;
23 return ret;
24 }
25
26 void init() {
27 node_idx = node_pool;
28 root = node_alloc();
29 }
30
31 void ins(char *str) {
32 node_t *pos = root;
33 while (*str) {
34 int p = idx[*(str ++)];
35 if (!pos -> son[p]) pos -> son[p] = node_alloc();
36 pos = pos -> son[p];
37 }
38 pos -> v = 1;
39 }
40
41 void build() {
42 static queue <node_t *> q;
43 for (int i = 0; i < SON; i ++)
44 if (root -> son[i]) {
45 root -> son[i] -> fail = root;
46 q.push(root -> son[i]);
47 }
48 else root -> son[i] = root;
49 while (q.size()) {
50 node_t *u = q.front();
51 q.pop();
52 for (int i = 0; i < SON; i ++)
53 if (u -> son[i]) {
54 u -> son[i] -> fail = u -> fail -> son[i];
55 u -> son[i] -> v |= u -> fail -> son[i] -> v;
56 q.push(u -> son[i]);
57 }
58 else u -> son[i] = u -> fail -> son[i];
59 }
60 }
61
62 int main() {
63 idx['A'] = 0, idx['G'] = 1, idx['C'] = 2, idx['T'] = 3;
64 int cas = 0;
65 while (scanf("%d", &n) != EOF && n) {
66 init();
67 for (int i = 0; i < n; i ++) {
68 scanf("%s", t);
69 ins(t);
70 }
71 build();
72 scanf("%s", s);
73 int sz = strlen(s);
74 int cnt = node_idx - node_pool;
75
76 now = 0, pre = 1;
77 memset(f[now], 0x3f, sizeof(f[now]));
78 f[now][0] = 0;
79 for (int i = 0; i < sz; i ++) {
80 now ^= 1, pre ^= 1;
81 memset(f[now], 0x3f, sizeof(f[now]));
82 for (int j = 0; j < cnt; j ++)
83 if (f[pre][j] < INF) {
84 node_t *pos = node_pool + j;
85 for (int k = 0; k < 4; k ++)
86 if (!pos -> son[k] -> v) {
87 int t = pos -> son[k] - node_pool;
88 f[now][t] = min(f[now][t], f[pre][j] + (k != idx[s[i]]));
89 }
90 }
91 }
92
93 printf("Case %d: ", ++ cas);
94 int ans = INF;
95 for (int i = 0; i < cnt; i ++)
96 if (!(node_pool + i) -> v) ans = min(ans, f[now][i]);
97 if (ans == INF) printf("-1\n");
98 else printf("%d\n", ans);
99 }
100 return 0;
101 }