hdoj 2457 DNA repair 【AC自动机 + dp】



DNA repair

Time Limit: 5000/2000 MS (Java/Others)    Memory Limit: 32768/32768 K (Java/Others)
Total Submission(s): 1747    Accepted Submission(s): 932


Problem Description
Biologists finally invent techniques of repairing DNA that contains segments causing kinds of inherited diseases. For the sake of simplicity, a DNA is represented as a string containing characters 'A', 'G' , 'C' and 'T'. The repairing techniques are simply to change some characters to eliminate all segments causing diseases. For example, we can repair a DNA "AAGCAG" to "AGGCAC" to eliminate the initial causing disease segments "AAG", "AGC" and "CAG" by changing two characters. Note that the repaired DNA can still contain only characters 'A', 'G', 'C' and 'T'.

You are to help the biologists to repair a DNA by changing least number of characters.
 

Input
The input consists of multiple test cases. Each test case starts with a line containing one integers N (1 ≤ N ≤ 50), which is the number of DNA segments causing inherited diseases.
The following N lines gives N non-empty strings of length not greater than 20 containing only characters in "AGCT", which are the DNA segments causing inherited disease.
The last line of the test case is a non-empty string of length not greater than 1000 containing only characters in "AGCT", which is the DNA to be repaired.

The last test case is followed by a line containing one zeros.
 

Output
For each test case, print a line containing the test case number( beginning with 1) followed by the
number of characters which need to be changed. If it's impossible to repair the given DNA, print -1.
 

Sample Input
       
       
       
       
2 AAA AAG AAAG 2 A TG TGAATG 4 A G C T AGT 0
 

Sample Output
       
       
       
       
Case 1: 1 Case 2: 4 Case 3: -1
 



题意:给n个DNA序列,代表患病的基因。又给出一个人的DNA的序列,问你最少改动几个元素使得该人不患病,若无法达到目的输出-1。


思路:构建Trie状态转移图,并标记危险节点,当前节点now的失配指针指向危险节点,则now节点也是危险节点。

用dp[i][j]表示在不经过危险节点的前提下,从root出发走i步到达j节点需要的最少步数。

那么对于当前节点j,需要遍历它的4个孩子节点。

对每个孩子节点nextnode的处理——

1,危险节点跳过;

2,没有沿着文本串走即nextnode的字母与当前文本串的字母不同,need = dp[i][j] + 1;

3,沿着文本串走,need = dp[i][j],不需要额外的步数花费。

dp[i+1][nextnode] = min(dp[i+1][nextnode], need);



AC代码:



#include <cstdio>
#include <cstring>
#include <queue>
#include <algorithm>
#define MAXN 1005
#define INF 0x3f3f3f3f
using namespace std;
int p = 1;
struct Trie
{
    int next[MAXN][4], fail[MAXN], End[MAXN];
    int L, root;
    int newnode()
    {
        for(int i = 0; i < 4; i++)
            next[L][i] = -1;
        End[L++] = 0;
        return L-1;
    }
    void init()
    {
        L = 0;
        root = newnode();
    }
    int getval(char c)
    {
        if(c == 'A') return 0;
        if(c == 'T') return 1;
        if(c == 'G') return 2;
        if(c == 'C') return 3;
    }
    void Insert(char *s)
    {
        int now = root;
        for(int i = 0; s[i]; i++)
        {
            if(next[now][getval(s[i])] == -1)
                next[now][getval(s[i])] = newnode();
            now = next[now][getval(s[i])];
        }
        End[now] = 1;
    }
    void Build()
    {
        queue<int> Q;
        fail[root] = root;
        for(int i = 0; i < 4; i++)
        {
            if(next[root][i] == -1)
                next[root][i] = root;
            else
            {
                fail[next[root][i]] = root;
                Q.push(next[root][i]);
            }
        }
        while(!Q.empty())
        {
            int now = Q.front();
            Q.pop();
            if(End[fail[now]])
                End[now] = 1;
            for(int i = 0; i < 4; i++)
            {
                if(next[now][i] == -1)
                    next[now][i] = next[fail[now]][i];
                else
                {
                    fail[next[now][i]] = next[fail[now]][i];
                    Q.push(next[now][i]);
                }
            }
        }
    }
    int dp[MAXN][MAXN];
    //表示不经过危险节点的前提下,在Trie上从root开始 走i步到达j节点的最少步数
    //其中沿着文本串走不花费步数,否则花费加一。
    void solve(char *s)
    {
        memset(dp, INF, sizeof(dp));
        int n = strlen(s);
        dp[0][0] = 0;
        for(int i = 0; i < n; i++)
        {
            for(int j = 0; j < L; j++)
            {
                if(dp[i][j] == INF || End[j]) continue;
                for(int k = 0; k < 4; k++)
                {
                    int nextnode = next[j][k];
                    if(End[nextnode]) continue;//危险节点不能走
                    int need;
                    if(k == getval(s[i]))//沿着文本串走 不花费
                        need = dp[i][j];
                    else
                        need = dp[i][j]+1;
                    dp[i+1][nextnode] = min(dp[i+1][nextnode], need);
                }
            }
        }
        int ans = INF;
        for(int i = 0; i < L; i++)
            ans = min(dp[n][i], ans);
        printf("Case %d: %d\n", p++, ans == INF ? -1 : ans);
    }
};
Trie ac;
char str[MAXN];
int main()
{
    int n;
    while(scanf("%d", &n), n)
    {
        ac.init();
        for(int i = 0; i < n; i++)
            scanf("%s", str), ac.Insert(str);
        ac.Build(); scanf("%s", str); ac.solve(str);
    }
    return 0;
}


你可能感兴趣的:(hdoj 2457 DNA repair 【AC自动机 + dp】)