Boring counting
Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 65536/32768 K (Java/Others)
Total Submission(s): 1229 Accepted Submission(s): 487
Problem Description
035 now faced a tough problem,his english teacher gives him a string,which consists with n lower case letter,he must figure out how many substrings appear at least twice,moreover,such apearances can not overlap each other.
Take aaaa as an example.”a” apears four times,”aa” apears two times without overlaping.however,aaa can’t apear more than one time without overlaping.since we can get “aaa” from [0-2](The position of string begins with 0) and [1-3]. But the interval [0-2] and [1-3] overlaps each other.So “aaa” can not take into account.Therefore,the answer is 2(“a”,and “aa”).
Input
The input data consist with several test cases.The input ends with a line “#”.each test case contain a string consists with lower letter,the length n won’t exceed 1000(n <= 1000).
Output
For each test case output an integer ans,which represent the answer for the test case.you’d better use int64 to avoid unnecessary trouble.
Sample Input
Sample Output
Source
2010 ACM-ICPC Multi-University Training Contest(9)——Host by HNU
Recommend
zhengfen
本题要求原串中每个字符串至少出现两次且不重叠的子串个数!
本题可以想到一定得用后缀数组做。这样的子串必定是某个后缀的前缀。出现至少两次,即是说该串必须至少是某两个后缀的公共前缀;还有不重叠,这个条件可以转化为拥有该公用前缀的后缀的起始位置相差大于该字串长度。于是问题基本转化为后缀数组的基本模型了!接下来从1到len枚举子串的长度,借助height[]数组分段,若某段满足条件,ans++;这样即可求得答案。后缀数组真心有用!
此题总的时间复杂度为O(N*N),N<1000,依经验不会超时!
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
//*****************************************************************
const int MAXN=1000+100;
char str[MAXN];//待处理字符串
int sa[MAXN];//求得的后缀数组
int wa[MAXN],wb[MAXN],wv[MAXN],wh[MAXN];
int Rank[MAXN],height[MAXN];
int cmp(int *r,int a,int b,int l)
{
return r[a]==r[b]&&r[a+l]==r[b+l];
}
//求后缀数组sa[],下标1到n-1(此处n=strlen(str)+1)有效后缀
//将str的n个后缀从小到大进行排序之后把排好序的后缀的开头位置顺次放入sa中。
//保证Suffix(sa[i])<Suffix(sa[i+1])
//1<=i<n,sa[0]存放人为添加在末尾的那个最小的后缀
//倍增算法的时间复杂度为O(nlogn)
//倍增算法的空间复杂度都是O(n)
void da(char *r,int *sa,int n,int m)
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0;i<m;i++) wh[i]=0;
for(i=0;i<n;i++) wh[x[i]=r[i]]++;
for(i=1;i<m;i++) wh[i]+=wh[i-1];
for(i=n-1;i>=0;i--) sa[--wh[x[i]]]=i;
for(j=1,p=1;p<n;j*=2,m=p)
{
for(p=0,i=n-j;i<n;i++) y[p++]=i;
for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0;i<n;i++) wv[i]=x[y[i]];
for(i=0;i<m;i++) wh[i]=0;
for(i=0;i<n;i++) wh[wv[i]]++;
for(i=1;i<m;i++) wh[i]+=wh[i-1];
for(i=n-1;i>=0;i--) sa[--wh[wv[i]]]=y[i];
for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
}
return;
}
//定义height[i]=suffix(sa[i-1])和suffix(sa[i])的最长公
//共前缀,也就是排名相邻的两个后缀的最长公共前缀
//任意两个起始位置为i,j(假设Rank[i]<Rank[j])的后缀的最长公共前缀
//为height[Rank[i]+1]、height[Rank[i]+2]…height[Rank[j]]的最小值
void calheight(char *r,int *sa,int n)
{
int i,j,k=0;
for(i=1;i<=n;i++) Rank[sa[i]]=i;
for(i=0;i<n;height[Rank[i++]]=k)
for(k?k--:0,j=sa[Rank[i]-1];r[i+k]==r[j+k];k++);
return;
}
int Min(int a1,int a2,int a3)
{
a1=min(a1,a2);
return min(a1,a3);
}
int Max(int a1,int a2,int a3)
{
a1=max(a1,a2);
return max(a1,a3);
}
int main()
{
int len,i,j;
__int64 ans;
while(~scanf("%s",str))
{
if(strcmp(str,"#")==0)break;
len=strlen(str);
da(str,sa,len+1,200);
calheight(str,sa,len);
int Maxid,Minid;
ans=0;
for(i=1;i<=len/2;i++)
{
Maxid=-1,Minid=10000;
for(j=1;j<=len;j++)
{
if(height[j]>=i)
{
Minid=Min(Minid,sa[j-1],sa[j]);
Maxid=Max(Maxid,sa[j-1],sa[j]);
}
else
{
if(Maxid-Minid>=i)ans++;
Maxid=-1,Minid=10000;
}
}
if(Maxid-Minid>=i)ans++;
}
printf("%I64d\n",ans);
}
return 0;
}