hdu4622(后缀数组+ST算法)

Reincarnation

Time Limit: 6000/3000 MS (Java/Others)    Memory Limit: 131072/65536 K (Java/Others)
Total Submission(s): 1305    Accepted Submission(s): 448


Problem Description
Now you are back,and have a task to do:
Given you a string s consist of lower-case English letters only,denote f(s) as the number of distinct sub-string of s.
And you have some query,each time you should calculate f(s[l...r]), s[l...r] means the sub-string of s start from l end at r.
 

Input
The first line contains integer T(1<=T<=5), denote the number of the test cases.
For each test cases,the first line contains a string s(1 <= length of s <= 2000).
Denote the length of s by n.
The second line contains an integer Q(1 <= Q <= 10000),denote the number of queries.
Then Q lines follows,each lines contains two integer l, r(1 <= l <= r <= n), denote a query.
 

Output
For each test cases,for each query,print the answer in one line.
 

Sample Input
   
   
   
   
2 bbaba 5 3 4 2 2 2 5 2 4 1 4 baaba 5 3 3 3 4 1 4 3 5 5 5
 

Sample Output
   
   
   
   
3 1 7 5 8 1 3 8 5 1
Hint
I won't do anything against hash because I am nice.Of course this problem has a solution that don't rely on hash.
 

Source
2013 Multi-University Training Contest 3
 

Recommend
zhuyuanchen520
          本题要求给定字符区间内不同串的个数。由于看了那篇大牛的论文,知道是后缀数组的题目。
       首先求后缀数组sa[],height[].由于本题所查询区间是原字符串的一部分,这给题目带来难度。我们首先看最简单的模型。每增加一个后缀最多可增加len-sa[i]+1个前缀,如果i从1到len进行的话,最终即可得原串中所有的字串,但是应该去重。每添加一个后缀i,其对应的前缀最多增加len-sa[i]+1这其中有些是已经存在的height[i]个(其与排名在他前面一位的最长公共前缀)。而本题要做的是给定区间的查询问题。想了几乎一天,也没能想到好点的方法,于是决定枚举排名,假设查询区间是[left,right],每遇到一个下标位于该区间的,就看与他同区间的且在他前面的与它的公共前缀的长度(任意两个后缀的最长公共前缀问题可以根据所求的height数组利用线段树进行预处理),然后注意有没有超过该区间即可。
#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<cmath>
using namespace std;

//*****************************************************************
const int MAXN= 2000+100;
const int Mpow=12;//保证2^(Mpow)>MAXN即可
char str[MAXN];//待处理字符串
int sa[MAXN];//求得的后缀数组
int wa[MAXN],wb[MAXN],wv[MAXN],wh[MAXN];
int cmp(int *r,int a,int b,int l)
{
	return r[a]==r[b]&&r[a+l]==r[b+l];
}
//求后缀数组sa[],下标1到n-1(此处n=strlen(str)+1)有效后缀
//将str的n个后缀从小到大进行排序之后把排好序的后缀的开头位置顺次放入sa中。
//保证Suffix(sa[i])<Suffix(sa[i+1])
//1<=i<n,sa[0]存放人为添加在末尾的那个最小的后缀
//倍增算法的时间复杂度为O(nlogn)
//倍增算法的空间复杂度都是O(n)
void da(char *r,int *sa,int n,int m)
{
	int i,j,p,*x=wa,*y=wb,*t;
	for(i=0;i<m;i++) wh[i]=0;
	for(i=0;i<n;i++) wh[x[i]=r[i]]++;
	for(i=1;i<m;i++) wh[i]+=wh[i-1];
	for(i=n-1;i>=0;i--) sa[--wh[x[i]]]=i;
	for(j=1,p=1;p<n;j*=2,m=p)
	{
		for(p=0,i=n-j;i<n;i++) y[p++]=i;
		for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
		for(i=0;i<n;i++) wv[i]=x[y[i]];
		for(i=0;i<m;i++) wh[i]=0;
		for(i=0;i<n;i++) wh[wv[i]]++;
		for(i=1;i<m;i++) wh[i]+=wh[i-1];
		for(i=n-1;i>=0;i--) sa[--wh[wv[i]]]=y[i];
		for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
			x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
	}
	return;
}

int Rank[MAXN],height[MAXN];
//定义height[i]=suffix(sa[i-1])和suffix(sa[i])的最长公
//共前缀,也就是排名相邻的两个后缀的最长公共前缀
//任意两个起始位置为i,j(假设Rank[i]<Rank[j])的后缀的最长公共前缀
//为height[Rank[i]+1]、height[Rank[i]+2]…height[Rank[j]]的最小值
void calheight(char *r,int *sa,int n)
{
	int i,j,k=0;
	for(i=1;i<=n;i++) Rank[sa[i]]=i;
	for(i=0;i<n;height[Rank[i++]]=k)
		for(k?k--:0,j=sa[Rank[i]-1];r[i+k]==r[j+k];k++);
		return;
}

//*****************************************************************
int Mindp[MAXN][Mpow];
inline int Min(int a,int b)
{
	return a<b?a:b;
}

inline void init(int n)
{
	for(int i=1;i<=n;i++)
	{
		Mindp[i][0]=height[i];
	}
}

//预处理过程,时间复杂度为O(N*log(N))
//ST算法求区间最值
//dp[i][j]表示区间[i,i+2^j-1]最值
//求dp[i][j]时将其分成dp[i][j-1],dp[i+2^(j-1)][j-1]
//[i,i+2^j-1]=[i,i+2^(j-1)-1]+[i+2^(j-1),i+2^(j-1)+2^(j-1)-1]
inline void Rmp_ST(int n)
{
	int l,s;
	init(n);
	for(l=1;l<=16;l++)
	{
		for(s=1;s<=n;s++)
		{
			if(s+(1<<l)-1<=n)
			{
				Mindp[s][l]=Min(Mindp[s][l-1],Mindp[s+(1<<(l-1))][l-1]);
			}
		}
	}
}

inline int Rmp_ST_query(int s,int e)
{
	int Min_ans,Max_ans;
	int k=(int)(log(1.0*e-s+1)/log(2.0));
	return Min(Mindp[s][k],Mindp[e-(1<<(k))+1][k]);
}

int len;//整个字符串的长度
int Query(int left,int right)
{
	int i,ans=0,pre=-1,tot;
	int op=0;
	int most=right-left+1;
	//	int tmp;
	for(i=1;i<=len&&op<most;i++)
	{
		if(sa[i]>=left&&sa[i]<=right)
		{
			//	tmp=right-sa[i]+1;
			if(-1==pre)
			{
				pre=sa[i];//前一个的下标
				//	printf("pre=%d i=%d  ",pre,i);
				ans+=(right-pre+1);	//printf("ans=%d \n",ans);
			}
			else 
			{
				//tot=Min(right-sa[i],right-sa[pre])+1;
				tot=right-sa[i]+1;//最多包含这么多个前缀
				int add,tt=Rmp_ST_query(Rank[pre]+1,i);
				if(pre+tt<=right&&sa[i]+tt<=right)
				{	
				//	printf("pre+tt<=right&&sa[i]+tt<=right\n");
					add=(tot-tt);
				}
				else if(pre+tt<=right)
				{	
				//	printf("pre+tt<=right&&sa[i]+tt>right\n");
					add=0;
				}
				else if(sa[i]+tt<=right)
				{
					
				//	printf("pre+tt>right&&sa[i]+tt<=right\n");
					add=tot-(right-pre+1);
				}
				ans+=add;
			//	printf("pre=%d  sa[%d]=%d  tt=%d  add=%d  tot=%d  ans=%d\n",pre,i,sa[i],tt,add,tot,ans);
				pre=sa[i];
			}
			op++;
		}
	}
	return ans;
}

int main()
{
	int cas,n,q,left,right;
//	freopen("in.txt","r",stdin);
	cin>>cas;
	while(cas--)
	{
		scanf("%s",str);
		scanf("%d",&q);
		len=strlen(str);
		da(str,sa,len+1,200);
		calheight(str,sa,len);
		Rmp_ST(len);
		while(q--)
		{
			scanf("%d%d",&left,&right);
			left--;right--;
		//	printf("left=%d,right=%d\n",left,right);
		//	printf("************%d\n\n",Query(left,right));
			printf("%d\n",Query(left,right));
		}
	}
	return 0;
}

你可能感兴趣的:(数据结构,后缀数组)