题意:
确定有多少模式串是目标串的字串。
思路:
AC自动机:Aho-Corasick Algorithm (Aho-Corasick Automaton)
学习博客:http://www.notonlysuccess.com/index.php/aho-corasick-automaton/
学习资料:http://www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf
关于博客中的 ACAutomaton 的一点说明:
1. Construct 函数应该是最难理解的 : trie[u][i] = trie[fail[u]][i]
for (int i = 0; i < CHILD_NUM; ++i)
{
int& v = trie[u][i];
if (v)
{
fail[v] = trie[fail[u]][i];
que.push_back(v);
}
else
v = trie[fail[u]][i];
}
这其实代码中用到的一个技巧,是为了方便 fail 指针的计算。比如 fail[u] = x,求 fail[v] = trie[fail[u]][m] -> fail[v] = trie[x][m]
如果 x 节点没有 m 边呢?但是又存在 m 边的前缀,这样的话 trie[u][m] = trie[fail[u]][m] 这句话就会起到关键作用,相当于预存储的作用
2. Work() 函数: 一旦遇到一个匹配点 p 比如: uhersme 中的 he 匹配了,先把 he 的情况加起来,因为 i 是继续往前走的,如果这时 he 中的 e
也有匹配的话,也要考虑进来,这就是代码中 while 循环的作用,也是ac automaton 的精妙之处:查找 fail 指针,看看 e 是否在 trie 中。
int Work(char* word)
{
int ret = 0, p = 0;
for (int i = 0; word[i]; ++i)
{
int m = hashtab[word[i]];
int t = p = trie[p][m];
while (value[t])
{
ret += value[t];
value[t] = 0;
t = fail[t];
}
}
return ret;
}
#include <iostream>
#include <deque>
using namespace std;
const int MAX_NODE = 500010;
const int CHILD_NUM = 26;
class AcAutomation
{
private:
int size;
int trie[MAX_NODE][CHILD_NUM];
int value[MAX_NODE];
int fail[MAX_NODE];
deque<int> que;
int hashtab[128];
public:
void Initialize()
{
fail[0] = 0;
for (int i = 0; i < 26; ++i)
hashtab['a' + i] = i;
}
void Reset()
{
size = 1;
memset(trie[0], 0, sizeof(trie[0]));
memset(value, 0, sizeof(value));
}
void Insert(char* word, int key)
{
int p = 0;
for (int i = 0; word[i]; ++i)
{
int m = hashtab[word[i]];
if (!trie[p][m])
{
memset(trie[size], 0, sizeof(trie[0]));
trie[p][m] = size++;
}
p = trie[p][m];
}
value[p] += key;
}
void Construct()
{
que.empty();
for (int i = 0; i < CHILD_NUM; ++i)
{
if (trie[0][i])
{
fail[trie[0][i]] = 0;
que.push_back(trie[0][i]);
}
}
while (!que.empty())
{
int u = que.front();
que.pop_front();
for (int i = 0; i < CHILD_NUM; ++i)
{
int& v = trie[u][i];
if (v)
{
fail[v] = trie[fail[u]][i];
que.push_back(v);
}
else
v = trie[fail[u]][i];
}
}
}
int Work(char* word)
{
int ret = 0, p = 0;
for (int i = 0; word[i]; ++i)
{
int m = hashtab[word[i]];
int t = p = trie[p][m];
while (value[t])
{
ret += value[t];
value[t] = 0;
t = fail[t];
}
}
return ret;
}
};
AcAutomation Ac;
char word[1000010];
int main()
{
int cases;
scanf("%d", &cases);
Ac.Initialize();
while (cases--)
{
int n;
scanf("%d", &n);
Ac.Reset();
char is[56];
for (int i = 0; i < n; ++i)
{
scanf("%s", is);
Ac.Insert(is, 1);
}
Ac.Construct();
scanf("%s", word);
printf("%d\n", Ac.Work(word));
}
return 0;
}