poj 1743 男人八题之后缀数组求最长不可重叠最长重复子串

Musical Theme
Time Limit: 1000MS   Memory Limit: 30000K
Total Submissions: 14874   Accepted: 5118

Description

A musical melody is represented as a sequence of N (1<=N<=20000)notes that are integers in the range 1..88, each representing a key on the piano. It is unfortunate but true that this representation of melodies ignores the notion of musical timing; but, this programming task is about notes and not timings.
Many composers structure their music around a repeating &qout;theme&qout;, which, being a subsequence of an entire melody, is a sequence of integers in our representation. A subsequence of a melody is a theme if it:
  • is at least five notes long
  • appears (potentially transposed -- see below) again somewhere else in the piece of music
  • is disjoint from (i.e., non-overlapping with) at least one of its other appearance(s)

Transposed means that a constant positive or negative value is added to every note value in the theme subsequence.
Given a melody, compute the length (number of notes) of the longest theme.
One second time limit for this problem's solutions!

Input

The input contains several test cases. The first line of each test case contains the integer N. The following n integers represent the sequence of notes.
The last test case is followed by one zero.

Output

For each test case, the output file should contain a single line with a single integer that represents the length of the longest theme. If there are no themes, output 0.

Sample Input

30

25 27 30 34 39 45 52 60 69 79 69 60 52 45 39 34 30 26 22 18

82 78 74 70 66 67 64 60 65 80

0

Sample Output

5

Hint

Use scanf instead of cin to reduce the read time.

Source

 

给定一个正整数N(N<=20000),然后是N个整数xi(1<=xi<=88, 1<=i<=N)组成一个有序的整数序列;问这个序列中存在的最长一个符合条件的子序列长度是多少,符合的条件是

1、 子序列A长度至少为5

2、 有另外一个子序列B,且AB二者没有相交部分  不重叠

3、 AB的长度一样 AB相同或者 A的所有元素加上减去一个相同的数能变成B;

 

思路:

如果2个串满足上面条件 那么他们的每个元素a[i]-a[i-1]   即相邻的元素相减 得到的差值也应该是相同的    注意n个数的差中只要有n-1个数相同 (所以我代码中最后结果+1)那么这2个字串就是符合题意的

之后就是用后缀数组求不可重叠最长重复子串了  

首先由二分答案的方法将问题变成判定性的:长度大于k的重复字串有没有?然后将height数组分组,每组内的后缀之间的height都要大于k,如果每组内的后缀之间的最长公共前缀有大于k的而且这两个后缀的sa[]之差大于k就说明存在长度至少为k的不重复子串。

 


 

#include<stdio.h>

#include<string.h>

#include<iostream>

#include<cstdio>

#include<cmath>

#include<vector>

#include<cstring>

using namespace std;



const int nMax =1000012;



int  num[nMax];

int sa[nMax], rank[nMax], height[nMax];

int wa[nMax], wb[nMax], wv[nMax], wd[nMax];

int mmin(int a,int b)

{

	if(a>b) return b;

	return a;

}

int cmp(int *r, int a, int b, int l)

{

    return r[a] == r[b] && r[a+l] == r[b+l];

}



void da(int *r, int n, int m){          //  倍增算法 r为待匹配数组  n为总长度 m为字符范围

    int i, j, p, *x = wa, *y = wb, *t;

    for(i = 0; i < m; i ++) wd[i] = 0;

    for(i = 0; i < n; i ++) wd[x[i]=r[i]] ++;

    for(i = 1; i < m; i ++) wd[i] += wd[i-1];

    for(i = n-1; i >= 0; i --) sa[-- wd[x[i]]] = i;

    for(j = 1, p = 1; p < n; j *= 2, m = p){

        for(p = 0, i = n-j; i < n; i ++) y[p ++] = i;

        for(i = 0; i < n; i ++) if(sa[i] >= j) y[p ++] = sa[i] - j;

        for(i = 0; i < n; i ++) wv[i] = x[y[i]];

        for(i = 0; i < m; i ++) wd[i] = 0;

        for(i = 0; i < n; i ++) wd[wv[i]] ++;

        for(i = 1; i < m; i ++) wd[i] += wd[i-1];

        for(i = n-1; i >= 0; i --) sa[-- wd[wv[i]]] = y[i];

        for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; i ++){

            x[sa[i]] = cmp(y, sa[i-1], sa[i], j) ? p - 1: p ++;

        }

    }

}



void calHeight(int *r, int n){           //  求height数组。

    int i, j, k = 0;

    for(i = 1; i <= n; i ++) rank[sa[i]] = i; // 1->n

    for(i = 0; i < n; i++){

        for(k ? k -- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k ++);

        height[rank[i]] = k;

    }

}



int Log[nMax];

int best[20][nMax];//best[i][j] 表示从j开始的长度为2的i次方的一段元素的最小值

void initRMQ(int n)

{//初始化RMQ

	int  i,j;

	for(i = 1; i <= n ; i ++) best[0][i] = height[i];

	for(i = 1; i <= Log[n] ; i ++)

	{

		int limit = n - (1<<i) + 1;

		for(j = 1; j <= limit ; j ++)

		{

			best[i][j] = mmin(best[i-1][j] , best[i-1][j+(1<<i>>1)]);

		}

	}

}

int lcp(int a,int b) {//询问a,b后缀的最长公共前缀

	a = rank[a];    b = rank[b];

	if(a > b) swap(a,b);

	a ++;

	int t = Log[b - a + 1];

	return mmin(best[t][a] , best[t][b - (1<<t) + 1]);

}



void get_log()

{

	int i;

     Log[0] = -1;

	for(i=1;i<=nMax;i++)

	{ // 求log2,这么强大的位运算。。

		Log[i]=(i&(i-1))?Log[i-1]:Log[i-1] + 1 ;

	}

}

char str[nMax];

int ans[nMax];

int n;

int a[nMax];

int solve(int x)

{

///注意通过判断sa[i]-sa[i-1]>=k决定不重复长度是否大于k是不行的 因为有可能有好几个重复的排在一起

///对于每个都不大于k 但是最后一个和第一个的距离是大于k的

    ///注意sa[i],sa[i-1]不一定哪个大那个小

    int i,mx,mn;

    mx=0,mn=nMax;

    for(i=1;i<=n;i++)

    {

        if(height[i]>=x)

        {

              mx=max(mx,sa[i]);

              mn=min(mn,sa[i]);

              if(mx-mn>=x) return 1;

        }

        else

            {

                mx=mn=sa[i];

            }

    }



    return 0;

}



int main()

{

	int i,j;

    get_log();

	while(scanf("%d",&n)!=EOF)

	{

	    if(!n) break;

	    for(i=0;i<n;i++) scanf("%d",&a[i]);

	   // n--;

		for(i=1;i<n;i++)

		{

			num[i]=a[i]-a[i-1]+100;//加100防止出现负数

		}

		num[n]=0;

		da(num,n+1,300);//这里要开大一点  300

		calHeight(num,n);

		initRMQ(n);

	  /*

        for(i=0; i<n+1; i++)  // rank[i] : suffix(i)排第几

           printf("rank[%d] =  %d\n",i,rank[i]);

        printf("\n");

        for(i=0; i<n+1; i++)   // sa[i] : 排在第i个的是谁

           printf("sa[%d] =  %d\n",i,sa[i]);

       */

       int left,right,mx=0,mid;

       left=4;right=n/2+1;

       while(left<=right)

       {

            mid=(left+right)/2;

            if(solve(mid)&&mid>mx)

            {

                mx=mid;

                left=mid+1;

            }

            else

                {

                    right=mid-1;

                }

       }

       if(mx==0) {printf("0\n");continue;}

       printf("%d\n",mx+1);

    }

    return 0;

}



 

 


 



 

 

你可能感兴趣的:(后缀数组)