poj4052

题意:求一个文章(长度5.1e6)里面出现了多少个指定的模式串。重复出现只记一次。而且如果两个模式串都出现的情况下,一个是另一个的子串,则该子串不算出现过。

分析:AC自动机。

由于子串不算所以加一些特殊处理:

1.在文章匹配过程中,如果出现了一个模式串我们不是把匹配数量+1,而是记录那个出现过vis[id] = true;,当然trie树种也是记录了模式串的id。

2.在匹配结束后,我们遍历所有出现过的模式串,在Trie树种找到其所有出现过的子串并将其标为未出现过vis[id] = false;

要如何查找子串呢?

只需要记录每个出现过的串所对应的Trie树中的节点位置,由该节点向上走到root。其间走过的每个节点都沿着fail指针走到root一次。这样二重循环遍历到的所有节点就对应了Trie中所有该模式串的子串。

因为在AC自动机中父节点指针就是找前缀,fail指针就是找后缀。

poj4052
#include <cstdio>

#include <cstring>

#include <queue>

#include <algorithm>

#include <cctype>

using namespace std;



#define D(x)

const int MAX_LEN = (int)(5.1e6) + 10;

const int MAX_N = 2505;

const int MAX_FINGER_LEN = 1105;

const int MAX_CHILD_NUM = 26;

const int MAX_NODE_NUM = MAX_N * MAX_FINGER_LEN;



int n;

char st[MAX_LEN];

char st2[MAX_LEN];

int vis[MAX_N];

bool check[MAX_NODE_NUM];



struct Trie

{

    int next[MAX_NODE_NUM][MAX_CHILD_NUM];

    int fail[MAX_NODE_NUM];

    int count[MAX_NODE_NUM];

    int father[MAX_NODE_NUM];

    int node_cnt;

    int root;



    void init()

    {

        node_cnt = 0;

        root = newnode();

    }



    int newnode()

    {

        for (int i = 0; i < 26; i++)

            next[node_cnt][i] = -1;

        count[node_cnt++] = 0;

        return node_cnt - 1;

    }



    int get_id(char a)

    {

        return a - 'A';

    }



    void insert(char buf[], int index)

    {

        int len = strlen(buf);

        int now = root;

        for (int i = 0; i < len; i++)

        {

            int id = get_id(buf[i]);

            if (next[now][id] == -1)

            {

                next[now][id] = newnode();

                father[next[now][id]] = now;

            }

            now = next[now][id];

        }

        count[now] = index;

    }



    void build()

    {

        queue<int>Q;

        fail[root] = root;

        father[root] = root;

        for (int i = 0; i < 26; i++)

            if (next[root][i] == -1)

                next[root][i] = root;

            else

            {

                fail[next[root][i]] = root;

                Q.push(next[root][i]);

            }

        while (!Q.empty())

        {

            int now = Q.front();

            Q.pop();

            for (int i = 0; i < 26; i++)

                if (next[now][i] == -1)

                    next[now][i] = next[fail[now]][i];

                else

                {

                    fail[next[now][i]]=next[fail[now]][i];

                    Q.push(next[now][i]);

                }

        }

    }



    int query(char buf[])

    {

        int now = root;

        int res = 0;

        for (int i = 0; buf[i]; i++)

        {

            now = next[now][get_id(buf[i])];

            int temp = now;

            while (temp != root && !check[temp])

            {

                if (count[temp] != 0)

                    vis[count[temp]] = temp;

                check[temp] = true;

                temp = fail[temp];

            }

        }

        return res;

    }



    void debug()

    {

        for(int i = 0;i < node_cnt;i++)

        {

            printf("id = %3d,fail = %3d,end = %3d,chi = [",i,fail[i],count[i]);

            for(int j = 0;j < 26;j++)

                printf("%2d",next[i][j]);

            printf("]\n");

        }

    }



    void cal()

    {

        for (int i = 1; i <= n; i++)

        {

            if (vis[i] == 0)

            {

                continue;

            }

            int temp = vis[i];

            while (temp != root)

            {

                int temp2 = temp;

                while (temp2 != root && !check[temp2])

                {

                    if (count[temp2] != 0 && count[temp2] != i)

                    {

                        vis[count[temp2]] = 0;

                        check[temp2] = true;

                    }

                    temp2 = fail[temp2];

                }

                temp = father[temp];

            }

        }

    }

};



Trie ac;



void transform(char st[], char st2[])

{

    int len = 0;

    for (int i = 0; st[i]; i++)

    {

        if (isupper(st[i]))

        {

            st2[len++] = st[i];

            continue;

        }

        i++;

        int temp = 0;

        while (isdigit(st[i]))

        {

            temp *= 10;

            temp += st[i] - '0';

            i++;

        }

        for (int j = 0; j < temp; j++)

        {

            st2[len + j] = st[i];

        }

        len += temp;

        i++;

    }

    st2[len] = 0;

}



void input()

{

    scanf("%d", &n);

    for (int i = 1; i <= n; i++)

    {

        scanf("%s", st);

        transform(st, st2);

        ac.insert(st2, i);

    }

}



int work()

{

    memset(vis, 0, sizeof(vis));

    memset(check, 0, sizeof(check));

    ac.query(st2);

    memset(check, 0, sizeof(check));

    ac.cal();

    int ret = 0;

    for (int i = 1; i <= n; i++)

    {

        if (vis[i])

        {

            D(printf("#%d\n", vis[i]));

            ret++;

        }

    }

    return ret;

}



int main()

{

    int t;

    scanf("%d", &t);

    while (t--)

    {

        ac.init();

        input();

        ac.build();

        scanf("%s", st);

        transform(st, st2);

        printf("%d\n", work());

    }

    return 0;

}
View Code

 

你可能感兴趣的:(poj)