[POJ 2778] DNA Sequence (AC自动机+DP+矩阵加速)

Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments. 

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. 


Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences. 
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. 


Output

An integer, the number of DNA sequences, mod 100000.


Sample Input
4 3
AT
AC
AG

AA


Sample Output

36


禁止字符串,求长度为n的不包含模板串的字符串有多少个。典型的AC自动机+DP的题目。dp[ i ][ np ] 表示字符串构造到第 i 位且此时trie上匹配到了第np个点。 枚举第 i+1 个字母,碰到失配的时候fail回去并转移(已优化) ,碰到结尾的时候无视之,其余转移。

由于n比较大,需要用矩阵快速幂加速DP。构造矩阵: n [nxp][np] = 1 表示有一个状态可以从 trie 上的 np 的位置转移到下一个 nxp 的位置。然后快速幂之。


本题坑点:

(1)这题在POJ上时限卡的比较紧。所以必要的时候再MOD,不然会T。(或者是我自己常数写得不够美) 


#include <cstdio>
#include <cstring>
#include <cstdlib>

using namespace std;
const int nxtsize = 4, noden = 10*10+10, MOD = 100000;
int chn(char a)
{
	switch(a)
	{
		case 'A': return 0;
		case 'T': return 1;
		case 'C': return 2;
		case 'G': return 3;
	}
}

struct trnode
{
	char chr;
	int nxt[4],fail,val;
};

struct Matrix
{
	Matrix();
	Matrix(int size){memset(n, 0, sizeof(n));sz=size;}
	int sz;
	long long n[noden][noden];
	void E(){for(int i=0; i<noden; i++) n[i][i] = 1;}
	/*
	void pri()
	{
		for(int i=0; i<=sz; i++)
		{
			for(int j=0; j<=sz; j++)
				printf("%I64d ", this->n[i][j]);
			puts("");
		}
	}
	*/
};


int n,m;
int triep;
trnode trie[noden];
int que[noden];

void addstr(char*);
void calfail();
Matrix operator * (Matrix, Matrix);
Matrix mtxpow(Matrix,int);

int main()
{
	scanf("%d %d\n", &m, &n);
	for(int i=0; i<m; i++)
	{
		char patn[11];
		scanf("%s", patn);
		addstr(patn);
	}
	calfail();
	Matrix ans(triep);
	for(int np=0; np<=triep; np++)
	{
		if(trie[np].val) continue;
		for(int k=0; k<nxtsize; k++)
		{
			int nxp = trie[np].nxt[k];
			if(trie[nxp].val) continue;
			ans.n[nxp][np] ++;
		}
	}
	ans = mtxpow(ans,n);
	long long outp = 0;
	for(int i=0; i<=triep; i++)
	{
		if(trie[i].val) continue;
		outp += ans.n[i][0];
	}
	printf("%I64d\n", outp%MOD);
	return 0;
}

void addstr(char *patn)
{
	int len = strlen(patn);
	int np = 0;
	for(int i=0; i<len; i++)
	{
		int nxp = trie[np].nxt[chn(patn[i])];
		if(nxp)
			np = nxp;
		else
		{
			triep++;
			trie[np].nxt[chn(patn[i])] = triep;
			np = triep;
			trie[np].chr = patn[i];
		}
		if(i==len-1) trie[np].val = 1;
	}
	return;
}

void calfail()
{
	int qhead=0, qtail=0;
	for(int i=0; i<nxtsize; i++)
		if(trie[0].nxt[i])
			que[++qtail] = trie[0].nxt[i];
	while(qhead<qtail)
	{
		int np = que[++qhead];
		for(int i=0; i<nxtsize; i++)
		{
			int nxp = trie[np].nxt[i];
			int fap = trie[np].fail;
			if(!nxp)
			{
				trie[np].nxt[i] = trie[fap].nxt[i];
				continue;
			}
			que[++qtail] = nxp;
			trie[nxp].fail = trie[fap].nxt[i];
			fap = trie[nxp].fail;
			if(trie[fap].val) trie[nxp].val = 1;
		}
	}
	return;
}

Matrix operator * (Matrix a, Matrix b)
{
	int sz = a.sz;
	Matrix tem(sz);
	for(int i=0; i<=sz; i++)
		for(int j=0; j<=sz; j++)
			for(int k=0; k<=sz; k++)
			{
				tem.n[i][j] += a.n[i][k]*b.n[k][j];
				tem.n[i][j] %= MOD;
			}
	return tem;
}

Matrix mtxpow(Matrix cal, int t)
{
	Matrix tem(cal.sz);
	tem.E();
	while(t)
    {
        if(t&1)
            tem=tem*cal;
        cal=cal*cal;
        t>>=1;
    }
	return tem;
}


你可能感兴趣的:([POJ 2778] DNA Sequence (AC自动机+DP+矩阵加速))