算法导论—AC自动机

华电北风吹
日期:2016-05-03

AC自动机是比较高效的多模式匹配算法。类似于KMP在模式串上的状态转移算法,AC自动机通过在trie树上建立状态转移,使得对匹配串遍历一遍就可以找到所有的模式串。
AC自动机一般有以下三步:首先,对所有的模式串建立trie树。然后,对trie树所有节点以它的最长后缀对应的前缀字符串作为它的失配指针,建立AC自动机。最后一步,对文本串在AC自动机上进行匹配。

题目链接:
http://hihocoder.com/problemset/problem/1036?sid=786758

参考代码:

#include <iostream>
#include <queue>
#include <string>
#include <string.h>
using namespace std;

#define size 26

struct node
{
    node *fail;          //失败指针
    node *next[size];    //Tire每个节点有26个子节点,分别对应26个英文字母
    int count;           //该节点是否为单词的末尾节点
    node()
    {
        fail = NULL;
        count = 0;
        memset(next, NULL, sizeof(next));
    }
};

void Insert(node *root, string str)
{
    node *p = root;
    int i = 0, index;
    for (int i = 0; i < str.length(); i++)
    {
        index = str[i] - 'a';
        if (p->next[index] == NULL)
            p->next[index] = new node();
        p = p->next[index];
    }
    p->count++;
}
void GetFail(node *root)
{
    int i;
    root->fail = NULL;
    queue<node*> q;
    q.push(root);
    while (q.empty() == false)
    {
        node *temp = q.front();
        q.pop();
        node *p = NULL;
        for (i = 0; i<size; i++)
        {
            if (temp->next[i] != NULL)
            {
                if (temp == root)
                    temp->next[i]->fail = root;
                else
                {
                    p = temp->fail;
                    while (p != NULL)
                    {
                        if (p->next[i] != NULL)
                        {
                            temp->next[i]->fail = p->next[i];
                            break;
                        }
                        p = p->fail;
                    }
                    if (p == NULL)
                        temp->next[i]->fail = root;
                }
                q.push(temp->next[i]);
            }
        }
    }
}
bool Query(node *root, string str)
{
    int cnt = 0, index;
    node *p = root;
    for (int i = 0; i < str.length(); i++)
    {
        index = str[i] - 'a';
        while (p->next[index] == NULL && p != root)
            p = p->fail;
        p = p->next[index];
        p = (p == NULL) ? root : p;
        node *temp = p;
        while (temp != root)
        {
            if (temp->count != 0)
                return true;
            else
                temp = temp->fail;
        }
    }
    return false;
}
int main()
{
    int n;
    node *root = new node();
    cin >> n;
    string keyword;
    while (n--)
    {
        cin >> keyword;
        Insert(root, keyword);
    }
    GetFail(root);
    string str;
    cin >> str;
    if (Query(root, str))
        cout << "YES" << endl;
    else
        cout << "NO" << endl;
    return 0;
}

上面的代码是理论上正确的代码,但是如果那这个去提交的话会超时,把84行while修改为if以后就可以通过了,但是修改以后逻辑上是错误的代码。

下面给一个AC自动机功能扩展代码:

#include <iostream>
#include <queue>
#include <vector>
#include <string>
#include <string.h>
#include <fstream>
using namespace std;

#define size 26

struct ACNode
{
    ACNode *fail;          //失败指针
    ACNode *next[size];    //Tire每个节点有26个子节点,分别对应26个英文字母
    int count;             //该节点是否为单词的末尾节点,也可用于判断模式是否重复
    int patternNo;         //该节点是第几个模式,查找的时候用于还原用于
    ACNode()
    {
        fail = NULL;
        count = 0;
        patternNo = -1;
        memset(next, NULL, sizeof(next));
    }
};
void Insert(ACNode *root, string str, int patterNo)
{
    ACNode *p = root;
    int i = 0, index;
    for (int i = 0; i < str.length(); i++)
    {
        index = str[i] - 'a';
        if (p->next[index] == NULL)
            p->next[index] = new ACNode();
        p = p->next[index];
    }
    p->count++;
    p->patternNo = patterNo;
}
void GetFail(ACNode *root)
{
    int i;
    root->fail = NULL;
    queue<ACNode*> q;
    q.push(root);
    while (q.empty() == false)
    {
        ACNode *temp = q.front();
        q.pop();
        ACNode *p = NULL;
        for (i = 0; i<size; i++)
        {
            if (temp->next[i] != NULL)
            {
                if (temp == root)
                    temp->next[i]->fail = root;
                else
                {
                    p = temp->fail;
                    while (p != NULL)
                    {
                        if (p->next[i] != NULL)
                        {
                            temp->next[i]->fail = p->next[i];
                            break;
                        }
                        p = p->fail;
                    }
                    if (p == NULL)
                        temp->next[i]->fail = root;
                }
                q.push(temp->next[i]);
            }
        }
    }
}
int Query(ACNode *root, string str, vector<string> &keySet)
{
    int cnt = 0, index;
    ACNode *p = root;
    for (int i = 0; i < str.length();i++)
    {
        index = str[i] - 'a';
        while (p->next[index] == NULL && p != root)
            p = p->fail;
        p = p->next[index];
        p = (p == NULL) ? root : p;
        ACNode *temp = p;
        while (temp != root)
        {
            if (temp->count>0)
            {
                int patternNo = temp->patternNo;
                int patternLength = keySet[patternNo].length();
                cout << i - patternLength + 1 << " " << keySet[patternNo] << endl;
                cnt += temp->count;
            }
            temp = temp->fail;
        }
    }
    return cnt;
}
int main()
{
    ifstream in(".\\input.txt");
    cin.rdbuf(in.rdbuf());

    int n;
    ACNode *root = new ACNode();
    cin >> n;
    string keyword;
    vector<string> keySet;
    for (int i = 0; i < n;i++)
    {
        cin >> keyword;
        keySet.push_back(keyword);
        Insert(root, keyword,i);
    }
    GetFail(root);
    string str;
    cin >> str;
    Query(root, str, keySet);
    return 0;
}

参考博客:
http://www.cppblog.com/mythit/archive/2009/04/21/80633.html
http://www.cnblogs.com/xudong-bupt/p/3433506.html

你可能感兴趣的:(算法导论—AC自动机)