POJ2778 DNA sequence[自动AC机&矩阵快速幂]

题意:给定m个病毒的DNA序列(由AGCT组成),求长度为n的不包含病毒子串的DNA序列由多少个(模1e9+7),范围m<=10,病毒长度<=10,n<2^32。

题解:如果一个点走到下一个点就构成病毒,则这条边为非法边。易知原病毒的倒数第二个字母走到最后一个字母为非法边,但是不局限于这样的非法边。因此需要AC自动机的fail指针,如果fail指向的节点有病毒标记,则将这条边也打上病毒标记。然后将边的数量累计一下记录在邻接矩阵里,将这个邻接矩阵用快速幂求n次方,第一行之和就是虚拟节点走n步走到各节点的方案数之和,即为答案。

注意到由于有很多病毒序列,实际上这个矩阵里面有很多边为0。因此加上稀疏矩阵的乘法优化+寻址优化可以快到原来的二分之一。

#include<iostream>
#include<algorithm>
#include<cstdio>
#include<cstring>
using namespace std;
#define LL long long
const LL mo = 100000;
const int MAXS = 9999;
int n, m;

inline int f(char a)
{
	if (a=='A') return 0;
	if (a=='T') return 1;
	return a=='G'? 2 : 3;
}

struct mat {
	LL a[105][105];
	int n, m;
};
mat res, tmp;
mat operator * (mat A, mat B)
{
	memset(tmp.a, 0, sizeof tmp.a);
	tmp.n = A.n, tmp.m = B.m;
	for (int i = 1; i<=A.n; ++i)
		for (int k = 1; k<=A.m; ++k) {
			for (int j = 1; j<=B.m; ++j)
				tmp.a[i][j] = (tmp.a[i][j] + A.a[i][k]*B.a[k][j]) % mo;
		}
	return tmp;
}
mat ksm(mat A, int k)
{
	memset(res.a, 0, sizeof res.a);
	res.n = res.m = A.n;
	for (int i = 1; i<=res.n; ++i)
		res.a[i][i] = 1;
	for (; k; k>>=1, A = A*A)
		if (k&1) res = res * A;
	return res;
}

int que[MAXS];
struct Trie
{
	int ch[MAXS][4], fail[MAXS];
	bool isbug[MAXS];
	int ncnt;
	void init()
	{
		for (int i = 0; i<4; ++i) ch[0][i] = 1;
		ncnt = 1;
	}
	void ins(char *s)
	{
		int p = 1;
		for (int i = 0, c; s[i]; ++i)
		{
			c = f(s[i]);
			if (!ch[p][c]) p = ch[p][c] = ++ncnt;
			else p = ch[p][c];
		}
		isbug[p] = 1;
	}
	void buildac()
	{
		int l = 0, r = 0, t, i;
		fail[que[l] = 1] = 0;
		while (l <= r)
		{
			t = que[l ++];
			if (isbug[fail[t]]) isbug[t] = 1;
			for (i = 0; i < 4; ++i)
				if (ch[t][i])
				{
					fail[ch[t][i]] = ch[fail[t]][i];
					que[++r] = ch[t][i];
				}
				else ch[t][i] = ch[fail[t]][i];
		}
	}
	void buildmat(mat&A)
	{
		memset(A.a, 0, sizeof A.a);
		A.n = A.m = ncnt;
		for (int i = 1; i<=A.n; ++i)
			for (int j = 0; j<4; ++j)
				if (!isbug[i] && !isbug[ch[i][j]])
					A.a[i][ch[i][j]]++;
	}
} ac;

char bug[999];
mat X;
int main()
{
	scanf("%d%d", &m, &n);
	ac.init();
	for (int i = 1; i<=m; ++i)
		scanf("%s", bug), ac.ins(bug);
	ac.buildac();
	ac.buildmat(X);
	X = ksm(X, n);
	LL ans = 0;
	for (int i = 1; i <= X.n; ++i)
		ans = (ans + X.a[1][i]) % mo;
	cout << ans << '\n';
	return 0;
}


你可能感兴趣的:(POJ2778 DNA sequence[自动AC机&矩阵快速幂])