后缀数组的运用之求不重叠最长重复子串

  什么是不重叠最长子串呢,就是一个串中至少出现两次,又不重叠的子串中的最长的,比较绕口。

  解决这个问题的关键还是利用height 数组。把排序后的后缀分成若干组,其中每组的后缀之间的height 值都不小于k。然后找出各个组的后缀的sa值的最大最小值max,min,如果存在 max-min >= k,那么就存在长度为k的不重叠子串,因为根据LCP定理,每个组中的height值都不小于k,就是说这个组中这些后缀的最长公共前驱最小是k,然后由于max-min>= k,所以可以判定存在所要求的子串。做题的时候二分答案,把问题变为判定性问题(大牛说的)。那么整个事件复杂度为O(nlogn)。把height数组分组的思想非常常用,可以多看看IOI论文了解了解。

  pku 1743,题本身不难,可是,万恶的英语啊.....

#include<stdio.h>

#include<string.h>

#define MAX 20010

int wx[MAX],wy[MAX],bar[MAX],r[MAX];

int cmp(int *s,int a,int b,int len)

{return s[a] == s[b] && s[a+len] == s[b+len];}

void get_sa(int *note,int *sa,int len)

{

	int *Rank = wx,*result_y = wy,*result = r,*t,i,m = 200;

	for (i = 0; i<= m; i++)  bar[i] = 0;

	for (i = 0; i< len; i++) bar[Rank[i] = note[i]] ++;

	for (i = 0; i< m; i++) bar[i+1] += bar[i];

	for (i = len-1; i>= 0; i--) sa[--bar[Rank[i]]] = i;

	for (int k = 1,p = 1; p < len; k *= 2,m = p){

		for (p = 0,i = len-k; i < len; i++) result_y[p++] = i;

		for (i = 0; i < len; i++) if (sa[i] >= k) result_y[p++] = sa[i] - k;



		for (i = 0; i < len; i++) result[i] = Rank[result_y[i]];

		for (i = 0; i <= m; i++) bar[i] = 0;

		for (i = 0; i< len; i++) bar[result[i]]++;

		for (i = 0; i< m; i++) bar[i+1] += bar[i];

		for (i = len-1; i>= 0; i--) sa[--bar[result[i]]] = result_y[i];

		for (t = result_y,result_y = Rank,Rank = t,p = 1,Rank[sa[0]] = 0,i = 1; i < len; i++)

			Rank[sa[i]] = cmp(result_y,sa[i],sa[i-1],k)?p-1:p++;

	}

}

void get_height(int *note,int *sa,int *height,int len)

{

	int Rank[MAX],i,j,k = 0;

	for (i = 1; i < len; i++) Rank[sa[i]] = i;

	for (i = 0; i< len-1; height[Rank[i++]] = k)

		for (k?k--:0,j = sa[Rank[i] - 1]; note[i+k] == note[j+k]; k++) ;

}

bool check(int *sa,int *height,int n,int mid)

{

	int max = sa[1], min = sa[1];	

	for (int i = 2; i < n; i++) {		

		if (height[i] >= mid) {			

			if(sa[i]<min) min = sa[i];

			if(sa[i]>max) max = sa[i];		

			if(max-min>mid) return true;			

		} else	max=min=sa[i];

	}

	return false;

}

int main()

{

	int sa[20010],height[20010],note[20010];

	int n,i,j,k;

	while (scanf ("%d",&n) && n)

	{

		int t1,t2;

		scanf ("%d",&t1);

	    for (i = 0; i< n-1; i++){  

			scanf ("%d",&t2);

			note[i] = t2 - t1 + 88;

			t1 = t2;

		}

		note[n-1] = 0;

		get_sa(note,sa,n);

		get_height(note,sa,height,n);

		/*

		for (i = 1; i< n; i++)

			printf ("%d %d\n",i,height[i]);

		*/

	    int s = 1,e = n/2,mid = (s+e)/2;

		while (s <= e)

		{

			int mid = (s + e) >> 1;

            if (check (sa,height,n, mid)) s = mid + 1;

            else e = mid - 1;



		}

    	if (e >= 4)

			printf("%d\n", e+1);

        else printf("0\n");





	}

	return 0;

}

你可能感兴趣的:(后缀数组)