poj1743(后缀数组+二分)

Musical Theme
Time Limit: 1000MS   Memory Limit: 30000K
Total Submissions: 15387   Accepted: 5321

Description

A musical melody is represented as a sequence of N (1<=N<=20000)notes that are integers in the range 1..88, each representing a key on the piano. It is unfortunate but true that this representation of melodies ignores the notion of musical timing; but, this programming task is about notes and not timings.
Many composers structure their music around a repeating &qout;theme&qout;, which, being a subsequence of an entire melody, is a sequence of integers in our representation. A subsequence of a melody is a theme if it:
  • is at least five notes long
  • appears (potentially transposed -- see below) again somewhere else in the piece of music
  • is disjoint from (i.e., non-overlapping with) at least one of its other appearance(s)

Transposed means that a constant positive or negative value is added to every note value in the theme subsequence.
Given a melody, compute the length (number of notes) of the longest theme.
One second time limit for this problem's solutions!

Input

The input contains several test cases. The first line of each test case contains the integer N. The following n integers represent the sequence of notes.
The last test case is followed by one zero.

Output

For each test case, the output file should contain a single line with a single integer that represents the length of the longest theme. If there are no themes, output 0.

Sample Input

30
25 27 30 34 39 45 52 60 69 79 69 60 52 45 39 34 30 26 22 18
82 78 74 70 66 67 64 60 65 80
0

Sample Output

5

Hint

Use scanf instead of cin to reduce the read time.

Source

LouTiancheng@POJ
 
本题给定一个1-88的序列,可以对其中的某段进行加减任意数,求不可重叠最长重复子串。
         求不重叠的最长重复子串,说后缀数组的典型应用。可以对其中的某段进行加减任意数,这个条件应该进行转化,若某段加减某个数后可以找到符合条件的子串,则说明这两段的前后数的差值不变,从而可以将长度为n的数串转化为长度为n-1的对应相邻位置的差值串,注意最终答案+1.剩下任务就是求不可重叠最长重复子串。
         结合高人论文《后缀数组——处理字符串的有力工具》,可以想到用二分的思想。我们首先枚举一个长度midlen,然后看是否存在长度为midlen且不重叠的重复子串。将heght[]数组依据值与midlen的关系分段;这样一来长度大于等于midlen的子串必定在同一段,然后判断这样的两段是否不重叠,及起始位置差大于等于midlen;若存在,minlen=midlen+1,跳出开始新一次判断;若不存在,maxlen=midlen-1,跳出开始新一次判断……直至区间为长度为负。
后缀数组+二分,时间复杂度为O(N*log(N))
#include<iostream>
#include<cstring>
#include<cstdio>
using namespace std;

//*****************************************************************
const int MAXN=20000+100;
int str[MAXN];//待处理字符串
int sa[MAXN];//求得的后缀数组
int wa[MAXN],wb[MAXN],wv[MAXN],wh[MAXN];
int cmp(int *r,int a,int b,int l)
{
	return r[a]==r[b]&&r[a+l]==r[b+l];
}
//求后缀数组sa[],下标1到n-1(此处n=strlen(str)+1)有效后缀
//将str的n个后缀从小到大进行排序之后把排好序的后缀的开头位置顺次放入sa中。
//保证Suffix(sa[i])<Suffix(sa[i+1])
//1<=i<n,sa[0]存放人为添加在末尾的那个最小的后缀
//倍增算法的时间复杂度为O(nlogn)
//倍增算法的空间复杂度都是O(n)
void da(int *r,int *sa,int n,int m)
{
	int i,j,p,*x=wa,*y=wb,*t;
	for(i=0;i<m;i++) wh[i]=0;
	for(i=0;i<n;i++) wh[x[i]=r[i]]++;
	for(i=1;i<m;i++) wh[i]+=wh[i-1];
	for(i=n-1;i>=0;i--) sa[--wh[x[i]]]=i;
	for(j=1,p=1;p<n;j*=2,m=p)
	{
		for(p=0,i=n-j;i<n;i++) y[p++]=i;
		for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
		for(i=0;i<n;i++) wv[i]=x[y[i]];
		for(i=0;i<m;i++) wh[i]=0;
		for(i=0;i<n;i++) wh[wv[i]]++;
		for(i=1;i<m;i++) wh[i]+=wh[i-1];
		for(i=n-1;i>=0;i--) sa[--wh[wv[i]]]=y[i];
		for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
			x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
	}
	return;
}

int rank[MAXN],height[MAXN];
//定义height[i]=suffix(sa[i-1])和suffix(sa[i])的最长公
//共前缀,也就是排名相邻的两个后缀的最长公共前缀
//任意两个起始位置为i,j(假设rank[i]<rank[j])的后缀的最长公共前缀
//为height[rank[i]+1]、height[rank[i]+2]…height[rank[j]]的最小值
void calheight(int *r,int *sa,int n)
{
	int i,j,k=0;
	for(i=1;i<=n;i++) rank[sa[i]]=i;
	for(i=0;i<n;height[rank[i++]]=k)
		for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
		return;
}
//*****************************************************************

int min(int a,int b)
{
	return a<b?a:b;
}
int max(int a,int b)
{
	return a>b?a:b;
}

//求不可重叠最长重复子串
//先二分答案,把题目变成判定性问题:判断是否
//存在两个长度为k 的子串是相同的,且不重叠
//时间复杂度为O(N*log(N))
int Bin_Solve(int n)
{
	int i,Minlen,Maxlen,midlen,lowid,highid;
	bool flag;
	Minlen=0,Maxlen=n/2;
	while(Minlen<=Maxlen)//二分重复字串的长度
	{
		midlen=(Minlen+Maxlen)/2;
		lowid=n+1,highid=0;
		flag=false;
		for(i=1;i<=n&&!flag;i++)//此处i表示排名
		{
			if (height[i]<midlen)lowid=highid=sa[i];//中间有一个的长度不大于midlen,就断开了
			//即说明前面的和后面的最长公共前缀不可能大于等于miflen
			else if(height[i]>=midlen)//长度大于等于midlen
			{
				lowid=min(lowid,sa[i]);
				highid=max(highid,sa[i]);
				if(highid-lowid>=midlen)//且不重叠
					flag=true;
			}
		}
		if(flag)Minlen=midlen+1;
		else Maxlen=midlen-1;
	}
	return Maxlen<4?0:Maxlen+1;
}


int main()
{
	int n,i;
	//freopen("in.txt","r",stdin);
	while(~scanf("%d",&n),n)
	{
		for(i=0;i<n;i++)
		{
			scanf("%d",&str[i]);
		}
		if(n<10)
		{
			printf("0\n");
			continue;
		}
		for(i=0;i<n-1;i++)
		{
			str[i]=str[i]-str[i+1]+100;
		}
		str[n-1] = 0; 
		da(str,sa,n,200);
		calheight(str,sa,n-1);
		printf("%d\n",Bin_Solve(n-1));
	}
	return 0;
}

你可能感兴趣的:(后缀数组,二分)