POJ2778 AC自动机经典题


DNA Sequence

Time Limit: 1000MS Memory Limit: 65536K

Description

It’s well known that DNA Sequence is a sequence only contains A, C, T and G, and it’s very useful to analyze a segment of DNA Sequence,For example, if a animal’s DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don’t contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

36

AC自动机经典题
首先感谢右脚。
首先,对于一个自动机上节点,如果某next节点为空(失配),就连接到其fail的相应next去。这样随BFS过程,由于其fail已经连接好失配的地方,所以直接连fail的相应位置就行。
AC自动机上的边连来连去不就可以抽象成一个有向图,每走一步就相当于一个字符。
然后,求长度为L的字符串就是在这个图上走L步,不过,那些包含的串不能走(所以要标记,初始不是结束位置的节点也要随fail标记)。
怎么算走L步呢?戳这里。
最后对邻接矩阵快速幂一发就行。
小心炸int

#include 
#include 
#include 
#include 
#include 
#include 
using namespace std ;
#define ll long long
const ll maxn = 20, modd = 100000, maxN = 110 ;
char pat[maxn] ;
ll to ( char c ) {
    if ( c == 'A' )  return 0 ;
    else if ( c == 'G' ) return 1 ;
    else if ( c == 'T' ) return 2 ;
    else return 3 ;
}
struct node {
    node* nxt[4] ;
    node* fail ;
    ll tim, id ;
    ll ch ;
    node() {
        for ( ll i = 0 ; i < 4 ; ++ i ) 
            nxt[i] = NULL ;
        fail = NULL ;
        id = tim = 0 ;
        ch = -1 ;
    }
} *h, *p, *q, *dfn[maxN] ;
struct Matrix {
    ll a[maxN][maxN], N ;
    Matrix() {
        for ( int i = 0 ; i < maxN ; ++ i )
            for ( int j = 0 ; j < maxN ; j ++ ) 
                a[i][j] = 0 ;
        N = 0 ;
    }
    friend Matrix operator * ( Matrix A, Matrix B ) {
        Matrix C ;
        C.N = A.N ;
        int i, j, k ;
        for ( i = 1 ; i <= C.N ; ++ i ) 
            for ( j = 1 ; j <= C.N ; j ++ )
                for ( k = 1 ; k <= C.N ; k ++ )
                    C.a[i][j] = ( C.a[i][j] + A.a[i][k]*B.a[k][j] ) %modd ;
        return C ;
    }
    friend Matrix operator ^ ( Matrix A, ll b ) {
        Matrix C ;
        int i, j, k ;
        C.N = A.N ;
        for ( i = 1 ; i <= C.N ; ++ i ) 
            C.a[i][i] = 1 ;
        for ( ; b ; b >>= 1, A = A*A ) 
            if ( b&1 ) C = A*C ;
        return C ;
    }
    void out() {
        int i, j, k ;
        for ( i = 1 ; i <= N ; ++ i ) 
            for ( j = 1 ; j <= N ; j ++ ) 
                printf ( "%lld%c", a[i][j], j==N?'\n':' ' ) ;
    }
} ;

ll n, m, len, tot ;

void insert() {
    p = h ;
    int i, j, k, index ;
    for ( i = 1 ; i <= len ; ++ i ) {
        index = to(pat[i]) ;
        if ( p->nxt[index] ) p = p->nxt[index] ;
        else {
            p->nxt[index] = new node ;
            p = p->nxt[index] ;
            p->id = ++tot ;
            dfn[tot] = p ;
            p->ch = index ;
        }
    }
    p->tim = 1 ;
}

queue  Q ;
void get_fail() {
    while ( !Q.empty() ) Q.pop() ;
    int i, j, index ;
    node* x ;
    Q.push(h) ;
    while ( !Q.empty() ) {
        x = Q.front() ;
        Q.pop() ;
        for ( i = 0 ; i < 4 ; ++ i ) {
            if ( x->nxt[i] ) {
                for ( p = x->fail ; p && !p->nxt[i] ; p = p->fail ) ;
                x->nxt[i]->fail = p? p->nxt[i]:h ;
                if ( x->nxt[i]->fail->tim ) 
                    x->nxt[i]->tim = 1 ;
                Q.push(x->nxt[i]) ;
            } else {
                if ( x->fail ) x->nxt[i] = x->fail->nxt[i] ;
                else x->nxt[i] = h ;
            }
        }
    }
}

Matrix get_Matrix() {
    Matrix A ;
    int i, j ;
    A.N = tot ;
    for ( i = 1 ; i <= tot ; ++ i ) {
        for ( j = 0 ; j < 4 ; j ++ ) 
            if ( !dfn[i]->tim && !dfn[i]->nxt[j]->tim ) {
                ++A.a[i][dfn[i]->nxt[j]->id] ;
                A.a[i][dfn[i]->nxt[j]->id] %= modd ;
        }
    }
    return A ;
}

ll qpow ( ll a, ll b, ll rec = 1 ) {
    for ( ; b ; b>>=1, a *= a, rec %= modd, a %= modd ) 
        if ( b&1 ) rec *= a ;
    return rec ;
}   

int main() {
    int i, j, k ;
    scanf ( "%lld%lld", &n, &m ) ;
    if ( !n ) {
        printf ( "%lld\n", qpow(4,m) ) ;
        return 0 ;
    }
    h = new node ;
    h->id = ++tot ;
    dfn[tot] = h ;
    for ( i = 1 ; i <= n ; ++ i ) {
        scanf ( "%s", pat+1 ) ;
        len = strlen(pat+1) ;
        insert() ;
    }
    get_fail() ;
    Matrix A = get_Matrix() ;
    //A.out() ;
    A = A^m ;
    //A.out() ;
    ll ans = 0 ;
    for ( i = 1 ; i <= A.N ; ++ i ) 
            ans = ( ans + A.a[1][i] ) % modd ;
    printf ( "%lld\n", ans ) ;
    return 0 ;
}

你可能感兴趣的:(算法,Sol,字符串,AC自动机,图论)