[AC自动机+矩阵快速幂] 求长度为m的不包含某些给定子串的字符串数量 POJ2778

DNA Sequence

Time Limit: 1000MS   Memory Limit: 65536K
Total Submissions: 19748   Accepted: 7525

Description

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments. 

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. 

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences. 

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. 

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

36
 长度为 m 的可行字符串的数量 = 从根节点开始走 m 步经历的状态总数
 = 从根节点开始走 1 步可到达的节点状态构成的矩阵 的 m 次方
 m 步状态总数 = m 次方后枚举从 0 开始到各个节点的方案总和

#include 
#include 
#include 
#include 
using namespace std;

const int mod = 100000;
const int mn = 11, mk = 11;

int dy[130];
int mat[mn * mk][mn * mk];
int base[mn * mk][mn * mk], ans[mn * mk][mn * mk];

int num;
int nx[mn * mk][4];
int ed[mn * mk], fail[mn * mk];
void init()
{
	num = 0;
	memset(nx, -1, sizeof nx);
	memset(ed, 0, sizeof ed);
	memset(fail, 0, sizeof fail);
}
void add(char ch[])
{
	int len = strlen(ch);
	int r = 0;
	for (int i = 0; i < len; i++)
	{
		if (nx[r][dy[(int)ch[i]]] == -1)
            nx[r][dy[(int)ch[i]]] = ++num;
		r = nx[r][dy[(int)ch[i]]];
	}
	ed[r] = 1;
}
void build_Fail()
{
	queue que;
	int root = 0;
	fail[root] = root;
	for (int i = 0; i < 4; i++)
	{
		if (nx[root][i] == -1)
			nx[root][i] = root;
		else
		{
			fail[nx[root][i]] = root;
			que.push(nx[root][i]);
		}
	}
	while (!que.empty())
	{
		int r = que.front();
		que.pop();
		
		if (ed[fail[r]]) /// 如果 fail[r] (r的后缀) 被禁止, 那么 r 也被禁止 
            ed[r] = 1;
            
		for (int i = 0; i < 4; i++)
		{
			if (nx[r][i] == -1)
				nx[r][i] = nx[fail[r]][i];
			else
			{
				fail[nx[r][i]] = nx[fail[r]][i];
				que.push(nx[r][i]);
			}
		}
	}
}

void build_matrix()
{
    memset(mat, 0, sizeof mat);
    for (int i = 0; i <= num; i++) /// 枚举节点
    {
        for (int j = 0; j < 4; j++) /// 该节点子节点
        {
            if (!ed[i] && !ed[nx[i][j]]) /// 两端状态都允许(不允许的状态值为0)
                mat[i][nx[i][j]]++; /// 从 i 到它的子节点的可行路径 +1
        }
    }
}

void mul_mat(int a[][mn * mk], int b[][mn * mk], int c[][mn * mk])
{
    int temp[mn * mk][mn * mk];
    for (int i = 0; i <= num; i++)
    {
        for (int j = 0; j <= num; j++)
        {
            long long res = 0;
            for (int k = 0; k <= num; k++)
                res = (res + (long long)a[i][k] * b[k][j]) % mod;
            temp[i][j] = (int)res;
        }
    }
    for (int i = 0; i <= num; i++)
    {
        for (int j = 0; j <= num; j++)
            c[i][j] = temp[i][j];
    }
}
void qpow(int m)
{
    for (int i = 0; i <= num; i++)
    {
        for (int j = 0; j <= num; j++)
        base[i][j] = ans[i][j] = mat[i][j];
    }

    m--; /// 初始矩阵已为1次方
    while (m)
    {
        if (m & 1)
            mul_mat(ans, base, ans);
        mul_mat(base, base, base);
        m >>= 1;
    }
}

int main()
{
    dy['A'] = 0; // 建立对应状态
    dy['C'] = 1;
    dy['T'] = 2;
    dy['G'] = 3;
    
	int n, m;
	while (~scanf("%d %d", &n, &m))
    {
        init();
        while (n--)
        {
            char ch[15];
            scanf("%s", ch);
            add(ch);
        }
        build_Fail(); // 在Fail树上操作

        build_matrix(); /// 构建走 1 步的矩阵
        qpow(m); /// 矩阵快速幂

        int res = 0;
        for (int i = 0; i <= num; i++) /// 从 0 开始到各个节点的方案加和
            res = (res + ans[0][i]) % mod;
        printf("%d\n", res);
    }
	return 0;
}

 

 

你可能感兴趣的:(模板,常用技巧,字符串,AC自动机)