lightoj 1427 Substring Frequency (||) (AC自动机)

1427 - Substring Frequency (II)
PDF (English) Statistics Forum
Time Limit: 5 second(s) Memory Limit: 128 MB

A string is a finite sequence of symbols that are chosen from an alphabet. In this problem you are given a string T and n queries each with a string Pi, where the strings contain lower case English alphabets only. You have to find the number of times Pi occurs as a substring of T.

Input

Input starts with an integer T (≤ 10), denoting the number of test cases.

Each case starts with a line containing an integer n (1 ≤ n ≤ 500). The next line contains the string T (1 ≤ |T| ≤ 106). Each of the next n lines contains a string Pi(1 ≤ |Pi| ≤ 500).

Output

For each case, print the case number in a single line first. Then for each string Pi, report the number of times it occurs as a substring of T in a single line.

Sample Input

Output for Sample Input

2

5

ababacbabc

aba

ba

ac

a

abc

3

lightoj

oj

light

lit

Case 1:

2

3

1

4

1

Case 2:

1

1

0

Notes

1.      Dataset is huge, use faster I/O methods.

2.       If S is a string then |S| denotes the length of S.


题目链接:http://lightoj.com/volume_showproblem.php?problem=1427

题目大意:问单词在给定的段落中出现的次数

题目分析:AC自动机,注意单词可能重复,插单词的时候标记一下,把idx转到其第一次出现的坐标,本题动态new的话,不delete会mle,delete会超时

下面给出三种常见姿势和一个神奇姿势:

1.普通静态指针型 (3956 ms)

#include <cstdio>
#include <cstring>
int const MAXN = 1e6 + 5;
int const MAXM = 505;
char s[MAXM], t[MAXN];
int tail, head, sz;
int ans[MAXM], cg[MAXM];

struct NODE
{
	int cnt;
	int id;
	int idx;
	NODE *fail;
	NODE *next[26];
}*q[MAXM * MAXM], *root, *f, node[MAXM * MAXM];

inline void Newnode(NODE * &x)
{
	x = &node[sz];
	x -> id = sz ++;
	x -> cnt = 0;
	x -> idx = 0;
	x -> fail = NULL;
	memset(x -> next, NULL, sizeof(x -> next));
}

inline void Init()
{
	sz = 0;
	head = tail = 0;
	f = &node[0];
	f -> id = 0;
	f -> cnt = 0;
	f -> idx = 0;
	f -> fail = NULL;
	memset(f -> next, NULL, sizeof(f -> next));
	Newnode(root);
}

inline void Insert(NODE *p, char *s, int id)
{
	int len = strlen(s);
	for(int i = 0; i < len; i++)
	{
		int idx = s[i] - 'a';
		if(p -> next[idx] == NULL)
			Newnode(p -> next[idx]);
		p = p -> next[idx];
	}
	if(p -> cnt)
		cg[id] = p -> id; 
	else
	{
		p -> id = id;
		p -> cnt = 1;
	}
}

inline void Build_AC(NODE *root)
{
	q[tail ++] = root;
	while(tail != head)
	{
		NODE *p = q[head ++];
		for(int i = 0; i < 26; i++)
		{
			if(p -> next[i] != NULL)
			{
				if(p == root)
					p -> next[i] -> fail = root;
				else
				{
					NODE *tmp = p -> fail;
					while(tmp != NULL)
					{
						if(tmp -> next[i] != NULL)
						{
							p -> next[i] -> fail = tmp -> next[i];
					   		break;	
						}
						tmp = tmp -> fail;
					}
					if(tmp == NULL)
						p -> next[i] -> fail = root;
				}
				q[tail ++] = p -> next[i];
			}
		}
	}
}

inline void Query(NODE *root, char *t)
{
	NODE *p = root;
	int len = strlen(t);
	for(int i = 0; i < len; i++)
	{
		int idx = t[i] - 'a';
		while(p != root && p -> next[idx] == NULL)
			p = p -> fail;
		if(p -> next[idx] == NULL)
			p = root;
		else
			p = p -> next[idx];
		NODE *tmp = p;
		while(tmp != root)
		{
			ans[tmp -> id] += tmp -> cnt;
			tmp = tmp -> fail;
		}
	}
}	

int main()
{
	int T;
	scanf("%d", &T);
	for(int ca = 1; ca <= T; ca++)
	{
		memset(ans, 0, sizeof(ans));
		memset(cg, -1, sizeof(cg));
		Init();
		int n;
		scanf("%d", &n);
		scanf("%s", t);
		for(int i = 0; i < n; i++)
		{
			scanf("%s", s);
			Insert(root, s, i);
		}
		Build_AC(root);
		Query(root, t);
		printf("Case %d:\n", ca);
		for(int i = 0; i < n; i++)
		{
			if(cg[i] != -1)
				printf("%d\n", ans[cg[i]]);
			else
				printf("%d\n", ans[i]);
		}
	}
}


2.Tire图优化静态指针型 (4000ms) 。。反而更慢

#include <cstdio>
#include <cstring>
int const MAXN = 1e6 + 5;
int const MAXM = 505;
char s[MAXM], t[MAXN];
int tail, head, sz;
int ans[MAXM], cg[MAXM];

struct NODE
{
	int cnt;
	int id;
	int idx;
	NODE *fail;
	NODE *next[26];
}*q[MAXM * MAXM], *root, *f, node[MAXM * MAXM];

inline void Newnode(NODE * &x)
{
	x = &node[sz];
	x -> id = sz ++;
	x -> cnt = 0;
	x -> idx = 0;
	x -> fail = NULL;
	memset(x -> next, NULL, sizeof(x -> next));
}

inline void Init()
{
	sz = 0;
	head = tail = 0;
	f = &node[0];
	f -> id = 0;
	f -> cnt = 0;
	f -> idx = 0;
	f -> fail = NULL;
	memset(f -> next, NULL, sizeof(f -> next));
	Newnode(root);
}

inline void Insert(NODE *p, char *s, int id)
{
	int len = strlen(s);
	for(int i = 0; i < len; i++)
	{
		int idx = s[i] - 'a';
		if(p -> next[idx] == NULL)
			Newnode(p -> next[idx]);
		p = p -> next[idx];
	}
	if(p -> cnt)
		cg[id] = p -> id; 
	else
	{
		p -> id = id;
		p -> cnt = 1;
	}
}

inline void Build_AC(NODE *root)
{
	q[tail ++] = root;
	while(tail != head)
	{
		NODE *p = q[head ++];
		for(int i = 0; i < 26; i++)
		{
			if(p -> next[i] != NULL)
			{
				if(p == root)
					p -> next[i] -> fail = root;
				else
					p -> next[i] -> fail = p -> fail -> next[i];
				q[tail ++] = p -> next[i];
			}
			else
			{
				if(p == root)
					p -> next[i] = root;
				else
					p -> next[i] = p -> fail -> next[i];
			}
		}
	}
}

inline void Query(NODE *root, char *t)
{
	NODE *p = root;
	int len = strlen(t);
	for(int i = 0; i < len; i++)
	{
		int idx = t[i] - 'a';
		while(p != root && p -> next[idx] == NULL)
			p = p -> fail;
		if(p -> next[idx] == NULL)
			p = root;
		else
			p = p -> next[idx];
		NODE *tmp = p;
		while(tmp != root)
		{
			ans[tmp -> id] += tmp -> cnt;
			tmp = tmp -> fail;
		}
	}
}	

int main()
{
	int T;
	scanf("%d", &T);
	for(int ca = 1; ca <= T; ca++)
	{
		memset(ans, 0, sizeof(ans));
		memset(cg, -1, sizeof(cg));
		Init();
		int n;
		scanf("%d", &n);
		scanf("%s", t);
		for(int i = 0; i < n; i++)
		{
			scanf("%s", s);
			Insert(root, s, i);
		}
		Build_AC(root);
		Query(root, t);
		printf("Case %d:\n", ca);
		for(int i = 0; i < n; i++)
		{
			if(cg[i] != -1)
				printf("%d\n", ans[cg[i]]);
			else
				printf("%d\n", ans[i]);
		}
	}
}


3.静态数组Trie图优化 (2668 ms)

#include <cstdio>
#include <cstring>
#include <queue>
#include <algorithm>
using namespace std;
int const MAXN = 1e6 + 5;
int const MAXM = 505 * 505 ;
char t[MAXN], s[MAXM];
int n, ans[MAXM], cg[MAXM];

struct AC
{
	int next[MAXM][26], fail[MAXM], end[MAXM], id[MAXM];
	int root, tot;
	inline int Newnode()
	{
		for(int i = 0; i < 26; i++)
			next[tot][i] = -1;
		end[tot] = 0;
		fail[tot] = 0;
		return tot ++;
	}

	inline void Init()
	{
		tot = 0;
		root = Newnode();
	}

	inline void Insert(char *s, int idx)
	{
		int len = strlen(s);
		int now = root;
		for(int i = 0; i < len; i++)
		{
			int idx = s[i] - 'a';
			if(next[now][idx] == -1)
				next[now][idx] = Newnode();
			now = next[now][idx];
		}
		if(end[now])
			cg[idx] = id[now];
		else
		{
			end[now] = 1;
			id[now] = idx;
		}
	}

	inline void Build()
	{
		queue <int> q;
		q.push(root);
		while(!q.empty())
		{
			int now = q.front();
			q.pop();
			for(int i = 0; i < 26; i++)
			{
				if(next[now][i] == -1)
				{
					if(now == root)
						next[now][i] = root;
					else
						next[now][i] = next[fail[now]][i];
				}
				else
				{
					if(now == root)
						fail[next[now][i]] = root;
					else
						fail[next[now][i]] = next[fail[now]][i];
					q.push(next[now][i]);
				}
			}
		}
	}

	inline void Query(char *s)
	{
		int now = root;
		int len = strlen(s);
		for(int i = 0; i < len; i++)
		{
			int idx = s[i] - 'a';
			while(now != root && next[now][idx] == -1)
				now = fail[now];
			now = next[now][idx];
			int tmp = now;
			while(tmp != root)
			{
				ans[id[tmp]] += end[tmp];
				tmp = fail[tmp];
			}
		}
	}
}ac;

int main()
{
	int T;
	scanf("%d", &T);
	for(int ca = 1; ca <= T; ca++)
	{
		memset(cg, -1, sizeof(cg));
		memset(ans, 0, sizeof(ans));
		ac.Init();
		scanf("%d", &n);
		scanf("%s", t);
		for(int i = 0; i < n; i++)
		{
			scanf("%s", s);
			ac.Insert(s, i);
		}
		ac.Build();
		ac.Query(t);
		printf("Case %d:\n", ca);
		for(int i = 0; i < n; i++)
		{
			if(cg[i] != -1)
				printf("%d\n", ans[cg[i]]);
			else
				printf("%d\n", ans[i]);
		}
	}
}



4.静态数组Trie图优化,并用一个数组记录所有fail的点,累计的时候只看母串上的和有fail的点,注意fail的值是从深的地方往浅的地方回溯的 (700ms)

#include <cstdio>
#include <cstring>
#include <queue>
#include <algorithm>
using namespace std;
int const MAXN = 1e6 + 5;
int const MAXM = 505 * 505;
char t[MAXN], s[MAXM];
int ans[MAXM], pos[MAXM];
int n;

struct AC
{
	int next[MAXM][26], fail[MAXM];
	int st[MAXM];
	int root, tot, top;
	inline int Newnode()
	{
		memset(next[tot], -1, sizeof(next[tot]));
		ans[tot] = 0;
		fail[tot] = 0;
		return tot ++;
	}

	inline void Init()
	{
		top = 0;
		tot = 0;
		root = Newnode();
	}

	inline void Insert(char *s, int id)
	{
		int len = strlen(s);
		int now = root;
		for(int i = 0; i < len; i++)
		{
			int idx = s[i] - 'a';
			if(next[now][idx] == -1)
				next[now][idx] = Newnode();
			now = next[now][idx];
		}
		pos[id] = now;
	}

	inline void Build()
	{
		queue <int> q;
		q.push(root);
		while(!q.empty())
		{
			int now = q.front();
			q.pop();
			for(int i = 0; i < 26; i++)
			{
				if(next[now][i] == -1)
				{
					if(now == root)
						next[now][i] = root;
					else
						next[now][i] = next[fail[now]][i];
				}
				else
				{
					if(now == root)
						fail[next[now][i]] = root;
					else
						fail[next[now][i]] = next[fail[now]][i];
					q.push(next[now][i]);
					st[++ top] = next[now][i];
				}
			}
		}
	}

	inline void Query(char *s)
	{
		int now = root;
		int len = strlen(s);
		for(int i = 0; i < len; i++)
		{
			int idx = s[i] - 'a';
			now = next[now][idx];
			ans[now] ++;
		}
		while(top)
		{
			int p = st[top];
			ans[fail[p]] += ans[p];
			top --;
		}
	}
}ac;

int main()
{
	int T;
	scanf("%d", &T);
	for(int ca = 1; ca <= T; ca++)
	{
		ac.Init();
		scanf("%d", &n);
		scanf("%s", t);
		for(int i = 1; i <= n; i++)
		{
			scanf("%s", s);
			ac.Insert(s, i);
		}
		ac.Build();
		ac.Query(t);
		printf("Case %d:\n", ca);
		for(int i = 1; i <= n; i++)
			printf("%d\n", ans[pos[i]]);
	}
}



你可能感兴趣的:(AC自动机,lightoj)