Poj 2778 DNA Sequence

题目链接:http://poj.org/problem?id=2778

题目类型:AC自动机 + 矩阵快速幂+ DP

题意:给定一些病毒的模式串,问给定的长度为N的所有DNA串中有多少是不含有病毒串的。

数据范围N最大为2000000000

这道题的解题思路可以参考:http://blog.henix.info/blog/poj-2778-aho-corasick-dp.html

和:http://www.matrix67.com/blog/archives/276

关键是AC自动机中失效函数要稍作变化。(我理解的失效函数有好几种写法,先记录三种,在注释中,依据问题不同而选用不同的方式)

#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <map>
#include <queue>
#include <algorithm>
using namespace std;

#define SIGMA_SIZE 4
#define MAXNODE 105
#define TEXT_SIZE 2000
#define P_SIZE 15
#define P_NUM 15

#define MATRIX_SIZE 105
#define MOD 100000
struct Matrix
{
   long long int elem[MATRIX_SIZE][MATRIX_SIZE];
   int size;
   Matrix(){memset(elem,0,sizeof(elem));}
   void setSize(int _size)
   {
      size = _size;
   }
   Matrix operator = (const Matrix & other)
   {
      setSize(other.size);
      for(int i=0;i<size;i++)
      {
         for(int j= 0;j<size;j++)
         {
            elem[i][j] = other.elem[i][j];
         }
      }
      return *this;
   }
   Matrix operator * (const Matrix & other)
   {
      Matrix temp;
      temp.setSize(size);
      for(int i=0;i<size;i++)
      {
         for(int j=0;j<size;j++)
         {
            for(int k=0;k<size;k++)
            {
               temp.elem[i][j] += elem[i][k] * other.elem[k][j];
               if(temp.elem[i][j]>=MOD) temp.elem[i][j] %= MOD;
            }
         }
      }
      return temp;
   }
   void Power(int exp)
   {
      Matrix E;
      E.setSize(size);
      for(int i=0;i<size;i++) E.elem[i][i] = 1;
      while(exp)
      {
         if(exp & 1) E = E * (*this);
         *this = (*this) * (*this);
         exp >>= 1;
      }
      *this = E;
   }
};
struct AhoCorsickAutomata
{
   int cnt[P_NUM];
   int sz;
   int ch[MAXNODE][SIGMA_SIZE];
   int f[MAXNODE];
   int val[MAXNODE];
   int last[MAXNODE];
   void init()
   {
      sz = 1;
      memset(ch[0],0,sizeof(ch[0]));
      memset(cnt,0,sizeof(cnt));
      memset(f,0,sizeof(f));
   }
   int idx(char c)
   {
      if(c == 'A') return 0;
      if(c == 'C') return 1;
      if(c == 'G') return 2;
      if(c == 'T') return 3;
   }
   void insert(char *s,int v)
   {
      int u = 0,n = strlen(s);
      for(int i=0; i<n; i++)
      {
         int c = idx(s[i]);
         if(!ch[u][c])
         {
            memset(ch[sz],0,sizeof(ch[sz]));
            val[sz] = 0;
            ch[u][c] = sz++;
         }
         u = ch[u][c];
         }
         val[u] = v;
      }

   void print(int j)
   {
      if(j)
      {
         cnt[val[j]]++;
         print(last[j]);
      }
   }
   void find(char *T)
   {
      int n = strlen(T);
      int j = 0;
      for(int i=0; i<n; i++)
      {
         int c = idx(T[i]);
         while(j && !ch[j][c]) j = f[j];
         j = ch[j][c];
         if(val[j]) print(j);
         else if(last[j]) print(last[j]);
      }
   }
   void getFail()
   {
      queue<int> q;
      f[0] = 0;
      for(int c = 0; c<SIGMA_SIZE; c++)
      {
         int u = ch[0][c];
         if(u)
         {
            f[u] = 0;
            q.push(u);
            last[u] = 0;
         }
      }
      while(!q.empty())
      {
         int r = q.front();
         q.pop();
         for(int c = 0; c<SIGMA_SIZE; c++)
         {
            /*The first way:the last() funciton exists and we will find repeatedly*/
            /*int u = ch[r][c];
            if(!u) continue;
            q.push(u);
            int v = f[r];
            while(v && !ch[v][c]) v = f[v];
            f[u] = ch[v][c];
            last[u] = val[f[u]] ? f[u] : last[f[u]];
            */
            /*The second way:the last() function exists 
               but we see the all transfer in the same method
            */
            /*int u = ch[r][c];
            if(!u)
            {
               ch[r][c] = ch[f[r]][c];
               continue;
            }
            q.push(u);
            int v = f[r];
            f[u] = ch[v][c];
            last[u] = val[f[u]] ? f[u] : last[f[u]];   
            */
            /*The third way: we ingore the last() function 
               because we only foucus whether the current node
               is the key node
            */
            ///*
            int u = ch[r][c];
            if(!u)
            {
               ch[r][c] = ch[f[r]][c];
               continue;
            }
            q.push(u);
            int v = f[r];
            f[u] = ch[v][c];
            val[u] |= val[f[u]];
            //*/
         }
      }
   }
   Matrix work()
   {
      Matrix temp;
      temp.setSize(sz);
      for(int u=0;u<sz;u++)
      {
         if(val[u]) continue;
         for(int j=0;j<SIGMA_SIZE;j++)
         {
            int v = ch[u][j];
            if(val[v]) continue;
            temp.elem[u][v] += 1;
         }
      }
      return temp;
   }
};
AhoCorsickAutomata ac;
char text[TEXT_SIZE];
char p[P_SIZE];
int main()
{
   #ifndef ONLINE_JUDGE
      freopen("in.txt","r",stdin);
   #endif
   int m,n;
   while(scanf(" %d %d",&m,&n)!=EOF)
   {
      ac.init();
      for(int i=1;i<=m;i++)
      {
         scanf(" %s",p);
         ac.insert(p,1);
      }
      ac.getFail();
      Matrix mat = ac.work();
      mat.Power(n);

      long long int ans = 0;
      for(int i=0;i<mat.size;i++) ans += mat.elem[0][i];

      printf("%lld\n",ans % MOD );
   }
   return 0;
}


你可能感兴趣的:(Poj 2778 DNA Sequence)