Keywords Search
Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 65536/32768 K (Java/Others)
Total Submission(s): 25192 Accepted Submission(s): 8240
Problem Description
In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.
Input
First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.
Output
Print how many keywords are contained in the description.
Sample Input
1
5
she
he
say
shr
her
yasherhs
Sample Output
Author
Wiskey
Recommend
lcy
模版题:AC自动机
注意,输入的keyword可以重复。
#include<cstdio>
#include<cstring>
#include<queue>
using namespace std;
const int maxnode=600000;
const int SIGMA_SIZE=26;
int ch[maxnode][SIGMA_SIZE];
int val[maxnode],f[maxnode],last[maxnode];
int sz,Count; //结点总数
char S[1100000];
int idx(char c) //字符c的编号
{
return c-'a';
}
void print(int i,int j)
{
if(j && val[j]!=-1){
Count+=val[j]; val[j]=-1;
// printf("%d %d %d\n",i,j,val[j]);
print(i,last[j]);
}
}
//插入字符串s,附加信息为v。注意v必须非0,因为0代表“本结点不是单词结点”
void insert(char *s,int v)
{
int u=0,n=strlen(s);
for(int i=0;i<n;i++){
int c=idx(s[i]);
if(!ch[u][c]){ //结点不存在
memset(ch[sz],0,sizeof(ch[sz]));
val[sz]=0; //中间结点的附加信息为0
ch[u][c]=sz++; //新建结点
}
u=ch[u][c]; //往下走
}
val[u]+=v; //字符串的最后一个字符的附加信息为v
}
void getFail()
{
queue<int> q;
f[0]=0;
//初始化队列
for(int c=0;c<SIGMA_SIZE;c++)
{
int u=ch[0][c];
if(u)
{
f[u]=0; q.push(u); last[u]=0;
}
}
//按BFS顺序计算失配函数
while(!q.empty())
{
int r=q.front(); q.pop();
for(int c=0;c<SIGMA_SIZE;c++)
{
int u=ch[r][c];
if(!u)
continue;
q.push(u);
int v=f[r];
while(v && !ch[v][c]) v=f[v];
f[u]=ch[v][c];
last[u]=val[f[u]]?f[u]:last[f[u]];
}
}
}
//在文本串T中找模版
void find(char *T)
{
int n=strlen(T);
int j=0; //当前结点编号,初始为根结点
for(int i=0;i<n;i++) //文本串当前指针
{
int c=idx(T[i]);
while(j && !ch[j][c]) j=f[j]; //顺着失配边走,直到可以匹配
j=ch[j][c];
if(val[j])
print(i,j);
else if(last[j])
print(i,last[j]);
}
}
int main()
{
int T,N,i;
char sh[100];
scanf("%d",&T);
while(T--)
{
scanf("%d",&N);
Count=0;
sz=1; memset(ch[0],0,sizeof(ch[0])); //Tri树的初始化
for(i=1;i<=N;i++)
{
scanf("%s",sh);
insert(sh,1);
}
getFail();
scanf("%s",S);
find(S);
printf("%d\n",Count);
}
return 0;
}