pku 3691 DNA repair AC自动机——DP

http://poj.org/problem?id=3691

题意:

给出n个DNA病毒串,然后给出一个需要修改的DNA片段,问需要最少修改多少个字符才能是该DNA片段不含DNA病毒串,修改后的DNA片段长度不变

思路:
这题看了一天了,DP那地方好难懂。首先这里是多串匹配,我们用Trie树和fail的构造确定性有限状态自动机(DFA),然后再DFA上进行DP;

这里我DP理解了很长时间,dp[i][j]表示主串匹配到了第i个位置,然后到达的是AC自动机上的j状态时修改字符的个数,我们保证j状态不是模式串(DNA病毒串)的结束节点,然后不断地往后走选出一条匹配完主串,并且修改字符串数最少的的一条。

dp[i + 1][H[j].next[k]->id] = min(dp[i + 1][H[j].next[k]->id],dp[i][j] + add);

//#pragma comment(linker,"/STACK:327680000,327680000")

#include <iostream>

#include <cstdio>

#include <cmath>

#include <vector>

#include <cstring>

#include <algorithm>

#include <string>

#include <set>

#include <functional>

#include <numeric>

#include <sstream>

#include <stack>

#include <map>

#include <queue>



#define CL(arr, val)    memset(arr, val, sizeof(arr))



#define lc l,m,rt<<1

#define rc m + 1,r,rt<<1|1

#define pi acos(-1.0)

#define ll long long

#define L(x)    (x) << 1

#define R(x)    (x) << 1 | 1

#define MID(l, r)   (l + r) >> 1

#define Min(x, y)   (x) < (y) ? (x) : (y)

#define Max(x, y)   (x) < (y) ? (y) : (x)

#define E(x)        (1 << (x))

#define iabs(x)     (x) < 0 ? -(x) : (x)

#define OUT(x)  printf("%I64d\n", x)

#define lowbit(x)   (x)&(-x)

#define Read()  freopen("din.txt", "r", stdin)

#define Write() freopen("dout.txt", "w", stdout);





#define M 23

#define N 1000007

using namespace std;



const int inf = 0x7f7f7f7f;

struct node

{

    node *next[4];

    node *fail;

    int cnt;

    int id;

    void newnode()

    {

        fail = NULL;

        cnt = 0;

        for (int i= 0; i < 4; ++i)

        {

            next[i] = NULL;

        }

    }

};



class Ac_automat

{

public:

    node *root,*q[N],H[N];

    int fr,tl;

    int t,dp[1007][1007];

    void init()

    {

        fr = tl = 0;

        t = 0;

        H[t].newnode();

        H[t].id = t;//对Trie树上的每一个节点给一个状态号

        root = &H[t++];

    }

    int getVal(char ch)

    {

        if (ch == 'A') return 0;

        else if (ch == 'G') return 1;

        else if (ch == 'C') return 2;

        else return 3;

    }

    void insert(char *s)

    {

        int i,k;

        int len = strlen(s);

        node *p = root;

        for (i = 0; i < len; ++i)

        {

            k = getVal(s[i]);

            if (p->next[k] == NULL)

            {

                H[t].newnode();

                H[t].id = t;//给定状态的编号

                p->next[k] = &H[t++];

            }

            p = p->next[k];

        }

        p->cnt = 1;

    }

    void build()

    {

        root->fail = NULL;

        q[tl] = root;

        while (fr <= tl)

        {

            node *tmp = q[fr++];

            //这里解决出现aact  aac这种情况,只要包含aac就是非法的的有病毒的

            if (tmp != root)

            tmp->cnt = tmp->cnt||tmp->fail->cnt;



            for (int i = 0; i < 4; ++i)

            {

                //这里要更新每一个next为空的指针,好循环匹配

                if (tmp->next[i] == NULL)

                {

                    if (tmp == root) tmp->next[i] = root;

                    else tmp->next[i] = tmp->fail->next[i];

                }

                else//这里还是构造匹配失败指针

                {

                    if (tmp == root) tmp->next[i]->fail = root;

                    else

                    {

                        node *p = tmp->fail;

                        while (p != NULL)

                        {

                            if (p->next[i])

                            {

                                tmp->next[i]->fail = p->next[i];

                                break;

                            }

                            p = p->fail;

                        }

                        if (p == NULL) tmp->next[i]->fail = root;

                    }

                    q[++tl] = tmp->next[i];

                }

            }

        }

    }

    void solve(char *s,int cas)

    {

        int i,j,k;

        int len = strlen(s);

        for (i = 0; i <= len; ++i)

        {

            for (j = 0; j < t; ++j)

            {

                dp[i][j] = inf;

            }

        }



        dp[0][0] = 0;//根节点开始

        for (i = 0; i < len; ++i)//走主串

        {

            for (j = 0; j < t; ++j)//枚举每一个状态

            {

                if (dp[i][j] != inf && H[j].cnt== 0)//该状态合法

                {

                    //printf(">>>>>\n");

                    for (k = 0; k < 4; ++k)//枚举可走的边

                    {

                        if (H[j].next[k]->cnt != 0) continue;

                        int add = (k != getVal(s[i]));

                        dp[i + 1][H[j].next[k]->id] = min(dp[i + 1][H[j].next[k]->id],dp[i][j] + add);

                    }

                }

            }

        }

        int ans = -1;

        for (i = 0; i < t; ++i)

        {

           // printf("%d\n",dp[len][i]);

            if (dp[len][i] != inf && (ans == -1 || dp[len][i] < ans)) ans = dp[len][i];

        }

        printf("Case %d: %d\n",cas++,ans);

    }

}ac;



int n;

char str[N],ts[1007];



int main()

{

    int i;

    int cas = 1;

    while (~scanf("%d",&n))

    {

        if (!n) break;

        ac.init();

        for (i = 0; i < n; ++i)

        {

            scanf("%s",str);

            ac.insert(str);

        }

        ac.build();

        scanf("%s",ts);

        ac.solve(ts,cas);

        cas++;

    }

    return 0;

}

  

 

 

你可能感兴趣的:(AC自动机)