UVA LA 4670(AC自动机)

 

 

The archaeologists are going to decipher a very mysterious ``language". Now, they know many  language patterns; each pattern can be treated as a string on English letters (only lower  case). As a sub string, these patterns may appear more than one times in a large text  string (also only lower case English letters).

What matters most is that which patterns are the dominating patterns. Dominating pattern  is the pattern whose appearing times is not less than other patterns.

It is your job to find the dominating pattern(s) and their appearing times.

Input

The entire input contains multi cases. The first line of each case is an integer, which is  the number of patternsN, 1N150. Each of the following N lines contains one  pattern, whose length is in range [1, 70]. The rest of the case is one line contains a  large string as the text to lookup, whose length is up to106.

At the end of the input file, number `0' indicates the end of input file.

Output

For each of the input cases, output the appearing times of the dominating pattern(s). If  there are more than one dominating pattern, output them in separate lines; and keep their  input order to the output.

Sample Input

2 
aba 
bab 
ababababac 
6 
beta 
alpha 
haha 
delta 
dede 
tata 
dedeltalphahahahototatalpha 
0

Sample Output

4 
aba 
2 
alpha 
haha

 

 

刚在写这个题的时候发生了一个严重的错误=  T_T,在叉姐的群里讨论了下,输入会有重复串的==  目测我的结果被覆盖了,比如输入2 ab ab ab时应该输出1 ab ab,可是我的代码只会输出1 ab,这种重复串覆盖的现象好像是个经典Trick。。  【听刘大师讲的-。-   先贴上不能处理重复串的代码吧,具体细节我会自己在想的,想好了再把能A的代码贴过来==  T_T

/****************************
* author:crazy_石头
* date:2014/01/12
* algorithm:Aho-Corasick自动机
* Pro:LA 4670-Dominating Patterns
*****************************/
#include <iostream>
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <queue>

using namespace std;

#define INF 1<<29
#define eps 1e-8
#define A system("pause")
#define rep(i,h,n) for(int i=(h);i<(n);i++)
#define ms(a,b) memset((a),(b),sizeof(a))

const int maxm=150+10;
const int maxn=1000000+10;
const int sigma_size=70;
const int maxlen=2000000+5;

char str[maxm][sigma_size];
char ch[maxn];
int n;

struct AC_machine
{
    int next[maxm][sigma_size],fail[maxm],end[maxm],hash[maxm];
    int L,root;//end数组记录插入字符串的id,省掉结构体;
    inline int newnode()
    {
        rep(i,0,sigma_size)
            next[L][i]=-1;
        end[L++]=-1;
        return L-1;
    }
    inline void init()
    {
        L=0;
        root=newnode();
    }
    inline void insert(char str[],int id)
    {
        int len=strlen(str);
        int now=root;
        rep(i,0,len)
        {
            if(next[now][str[i]-'a']==-1)
                next[now][str[i]-'a']=newnode();
            now=next[now][str[i]-'a'];
        }
        end[now]=id;
    }
    inline void build_AC()
    {
        queue<int>q;
        fail[root]=root;
        rep(i,0,sigma_size)
        {
            if(next[root][i]==-1)
                next[root][i]=root;
            else
            {
                fail[next[root][i]]=root;
                q.push(next[root][i]);
            }
        }
        while(!q.empty())
        {
            int now=q.front();
            q.pop();
            rep(i,0,sigma_size)
            {
                if(next[now][i]==-1)
                    next[now][i]=next[fail[now]][i];
                else
                {
                    fail[next[now][i]]=next[fail[now]][i];
                    q.push(next[now][i]);
                }
            }
        }
    }
    inline int max(int a,int b)
    {
        return a>b?a:b;
    }
    inline void match(char ch[],int n)
    {
        rep(i,0,n)  hash[i]=0;
        int len=strlen(ch);
        int now=root;
        rep(i,0,len)
        {
            now=next[now][ch[i]-'a'];
            int temp=now;
            while(temp!=root)
            {
                if(end[temp]!=-1)
                    hash[end[temp]]++;
                temp=fail[temp];
            }
        }
        int best=hash[0];
        rep(i,1,n)  best=max(hash[i],best);//找出最优解;
        printf("%d\n",best);
        rep(i,0,n)
        {
            if(hash[i]==best)
            printf("%s\n",str[i]);
        }
    }
    inline void debug()
    {
        for(int i=0;i<L;i++)
        {
            printf("id = %3d,fail = %3d,end = %3d,chi = [",i,fail[i],end[i]);
            for(int j=0;j<sigma_size;j++)
                printf("%2d",next[i][j]);
            printf("]\n");
        }
    }
}AC;

int main()
{
    while(~scanf("%d",&n),n)
    {
        AC.init();
        rep(i,0,n)
        {
            scanf("%s",str[i]);
            AC.insert(str[i],i);
        }
        AC.build_AC();
        scanf("%s",ch);
        AC.match(ch,n);
    }
    return 0;
}


 

 前面没处理好重复串输入的现象,然后LRJ书上建议建立map映射存储id,因为直接存储可能会覆盖,虽然我还没想清楚【太弱;

然后稍微改了下就1A了==

 

AC代码:

/****************************
* author:crazy_石头
* date:2014/01/12
* algorithm:Aho-Corasick自动机
* Pro:LA 4670-Dominating Patterns
*****************************/
#include <iostream>
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <queue>
#include <map>
#include <string>

using namespace std;

#define INF 1<<29
#define eps 1e-8
#define A system("pause")
#define rep(i,h,n) for(int i=(h);i<(n);i++)
#define ms(a,b) memset((a),(b),sizeof(a))

const int maxm=70+5;
const int maxn=15000+10;
const int sigma_size=26;
const int maxlen=1000000+5;

char str[maxn][maxm];
char ch[maxlen];
int n;

map<string,int>mp;
struct AC_machine
{
    int next[maxn][sigma_size],fail[maxn],end[maxn],hash[maxn];
    int L,root;//end数组记录插入字符串的id,省掉结构体;
    inline int newnode()
    {
        rep(i,0,sigma_size)
            next[L][i]=-1;
        end[L++]=-1;
        return L-1;
    }
    inline void init()
    {
        L=0;
        root=newnode();
        mp.clear();
    }
    inline void insert(char str[],int id)
    {
        int len=strlen(str);
        int now=root;
        rep(i,0,len)
        {
            if(next[now][str[i]-'a']==-1)
                next[now][str[i]-'a']=newnode();
            now=next[now][str[i]-'a'];
        }
        end[now]=id;
        string s=str;
        mp[s]=id;
    }
    inline void build_AC()
    {
        queue<int>q;
        fail[root]=root;
        rep(i,0,sigma_size)
        {
            if(next[root][i]==-1)
                next[root][i]=root;
            else
            {
                fail[next[root][i]]=root;
                q.push(next[root][i]);
            }
        }
        while(!q.empty())
        {
            int now=q.front();
            q.pop();
            rep(i,0,sigma_size)
            {
                if(next[now][i]==-1)
                    next[now][i]=next[fail[now]][i];
                else
                {
                    fail[next[now][i]]=next[fail[now]][i];
                    q.push(next[now][i]);
                }
            }
        }
    }
    inline int max(int a,int b)
    {
        return a>b?a:b;
    }
    inline void match(char ch[],int n)
    {
        rep(i,0,n)  hash[i]=0;
        int len=strlen(ch);
        int now=root;
        rep(i,0,len)
        {
            now=next[now][ch[i]-'a'];
            int temp=now;
            while(temp!=root)
            {
                if(end[temp]!=-1)
                    hash[end[temp]]++;
                temp=fail[temp];
            }
        }
        int best=hash[0];
        rep(i,1,n)  best=max(hash[i],best);//找出最优解;
        printf("%d\n",best);
        rep(i,0,n)
        {
            if(hash[mp[string(str[i])]]==best)
            printf("%s\n",str[i]);
        }
    }
    inline void debug()
    {
        for(int i=0;i<L;i++)
        {
            printf("id = %3d,fail = %3d,end = %3d,chi = [",i,fail[i],end[i]);
            for(int j=0;j<sigma_size;j++)
                printf("%2d",next[i][j]);
            printf("]\n");
        }
    }
}AC;

int main()
{
    while(~scanf("%d",&n),n)
    {
        AC.init();
        rep(i,0,n)
        {
            scanf("%s",str[i]);
            AC.insert(str[i],i);
        }
        AC.build_AC();
        scanf("%s",ch);
        AC.match(ch,n);
    }
    return 0;
}


 

 

你可能感兴趣的:(UVA LA 4670(AC自动机))