poj3261(后缀数组+二分)

Milk Patterns
Time Limit: 5000MS   Memory Limit: 65536K
Total Submissions: 8133   Accepted: 3712
Case Time Limit: 2000MS

Description

Farmer John has noticed that the quality of milk given by his cows varies from day to day. On further investigation, he discovered that although he can't predict the quality of milk from one day to the next, there are some regular patterns in the daily milk quality.

To perform a rigorous study, he has invented a complex classification scheme by which each milk sample is recorded as an integer between 0 and 1,000,000 inclusive, and has recorded data from a single cow overN (1 ≤N ≤ 20,000) days. He wishes to find the longest pattern of samples which repeats identically at least K (2 ≤KN) times. This may include overlapping patterns -- 1 2 3 2 3 2 3 1 repeats 2 3 2 3 twice, for example.

Help Farmer John by finding the longest repeating subsequence in the sequence of samples. It is guaranteed that at least one subsequence is repeated at leastK times.

Input

Line 1: Two space-separated integers: N and K
Lines 2.. N+1: N integers, one per line, the quality of the milk on day i appears on the ith line.

Output

Line 1: One integer, the length of the longest pattern which occurs at least K times

Sample Input

8 2
1
2
3
2
3
2
3
1

Sample Output

4

Source

USACO 2006 December Gold
本题要求至少出现k次的可重叠最长重复子串的长度。
         此题是个典型的后缀数组题目。首先建立后缀数组,求出sa[],rank[],height[],这些都是后缀数组的基本操作,套用模板即可。我们二分长度midlen。对于height[],可以根据midlen分段,统计每段内的该长度的次数。若满足条件的这样的长度存在,则minlen=midlen+1,即增加子串的长度重新判定;若不满,则说明该选定的长度过长,需缩短,于是maxlen=midlen-1,然后重新判定。此过程的时间复杂度为O(N*Log(N))。
 
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;

//*****************************************************************
const int MAXN=1000000+100;
int str[MAXN];//待处理字符串
int sa[MAXN];//求得的后缀数组
int wa[MAXN],wb[MAXN],wv[MAXN],wh[MAXN];
int cmp(int *r,int a,int b,int l)
{
	return r[a]==r[b]&&r[a+l]==r[b+l];
}
//求后缀数组sa[],下标1到n-1(此处n=strlen(str)+1)有效后缀
//将str的n个后缀从小到大进行排序之后把排好序的后缀的开头位置顺次放入sa中。
//保证Suffix(sa[i])<Suffix(sa[i+1])
//1<=i<n,sa[0]存放人为添加在末尾的那个最小的后缀
//倍增算法的时间复杂度为O(nlogn)
//倍增算法的空间复杂度都是O(n)
void da(int *r,int *sa,int n,int m)
{
	int i,j,p,*x=wa,*y=wb,*t;
	for(i=0;i<m;i++) wh[i]=0;
	for(i=0;i<n;i++) wh[x[i]=r[i]]++;
	for(i=1;i<m;i++) wh[i]+=wh[i-1];
	for(i=n-1;i>=0;i--) sa[--wh[x[i]]]=i;
	for(j=1,p=1;p<n;j*=2,m=p)
	{
		for(p=0,i=n-j;i<n;i++) y[p++]=i;
		for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
		for(i=0;i<n;i++) wv[i]=x[y[i]];
		for(i=0;i<m;i++) wh[i]=0;
		for(i=0;i<n;i++) wh[wv[i]]++;
		for(i=1;i<m;i++) wh[i]+=wh[i-1];
		for(i=n-1;i>=0;i--) sa[--wh[wv[i]]]=y[i];
		for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
			x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
	}
	return;
}

int rank[MAXN],height[MAXN];
//定义height[i]=suffix(sa[i-1])和suffix(sa[i])的最长公
//共前缀,也就是排名相邻的两个后缀的最长公共前缀
//任意两个起始位置为i,j(假设rank[i]<rank[j])的后缀的最长公共前缀
//为height[rank[i]+1]、height[rank[i]+2]…height[rank[j]]的最小值
void calheight(int *r,int *sa,int n)
{
	int i,j,k=0;
	for(i=1;i<=n;i++) rank[sa[i]]=i;
	for(i=0;i<n;height[rank[i++]]=k)
		for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
		return;
}
int Max(int a,int b)
{
	return a<b?b:a;
}

int Solve(int num,int tim)
{
	int i,Maxlen,Minlen,midlen;
	Minlen=0,Maxlen=num;
	while(Minlen<=Maxlen)
	{
		midlen=(Minlen+Maxlen)/2;
		int tmp=1;
		for(i=1;i<=num;i++)
		{
			if(height[i]<midlen)
			{
				tmp=1;
			}
			else
			{
				tmp++;
			}
			if(tmp>=tim)break;
		}
		if(tmp>=tim)
			Minlen=midlen+1;
		else Maxlen=midlen-1;
	}
	return Maxlen;
}

int main()
{
	int num,tim,i;
	//freopen("in.txt","r",stdin);
	while(~scanf("%d%d",&num,&tim))
	{
		for(i=0;i<num;i++)
		{
			scanf("%d",&str[i]);
			str[i]++;//保证在末尾差的0是最小的
		}
		str[num]=0;

		da(str,sa,num+1,MAXN);
		calheight(str,sa,num);
		printf("%d\n",Solve(num,tim));

	}
	return 0;
}

你可能感兴趣的:(后缀数组,二分)