poj 3261 Milk Patterns 最长的出现最少k次的重复(可重叠)子串 后缀数组

Milk Patterns
Time Limit: 5000MS   Memory Limit: 65536K
Total Submissions: 4551   Accepted: 1942
Case Time Limit: 2000MS

Description

Farmer John has noticed that the quality of milk given by his cows varies from day to day. On further investigation, he discovered that although he can't predict the quality of milk from one day to the next, there are some regular patterns in the daily milk quality.

To perform a rigorous study, he has invented a complex classification scheme by which each milk sample is recorded as an integer between 0 and 1,000,000 inclusive, and has recorded data from a single cow over N (1 ≤ N ≤ 20,000) days. He wishes to find the longest pattern of samples which repeats identically at least K (2 ≤ K ≤ N) times. This may include overlapping patterns -- 1 2 3 2 3 2 3 1 repeats 2 3 2 3 twice, for example.

Help Farmer John by finding the longest repeating subsequence in the sequence of samples. It is guaranteed that at least one subsequence is repeated at least K times.

Input

Line 1: Two space-separated integers:  N and  K 
Lines 2.. N+1:  N integers, one per line, the quality of the milk on day  i appears on the  ith line.

Output

Line 1: One integer, the length of the longest pattern which occurs at least  K times

Sample Input

8 2
1
2
3
2
3
2
3
1

Sample Output

4

 

 

 

/////

 

#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
using namespace std;
///后缀数组  倍增算法
const int maxn=1010000;
char str[maxn];
int wa[maxn],wb[maxn],wv[maxn],wn[maxn],a[maxn],sa[maxn];
int cmp(int* r,int a,int b,int l)
{return r[a]==r[b]&&r[a+l]==r[b+l];}
/**n为字符串长度,m为字符的取值范围,r为字符串。后面的j为每次排
序时子串的长度*/
void DA(int* r,int* sa,int n,int m)
{
    int i,j,p,*x=wa,*y=wb,*t;
    ///对R中长度为1的子串进行基数排序
    for(i=0;i<m;i++)wn[i]=0;
    for(i=0;i<n;i++)wn[x[i]=r[i]]++;
    for(i=1;i<m;i++)wn[i]+=wn[i-1];
    for(i=n-1;i>=0;i--)sa[--wn[x[i]]]=i;
    for(j=1,p=1;p<n;j*=2,m=p)
    {
        /**利用了上一次基数排序的结果,对待排序的子串的第二关键字进行
        了一次高效地基数排序*/
        for(p=0,i=n-j;i<n;i++)y[p++]=i;
        for(i=0;i<n;i++)if(sa[i]>=j)y[p++]=sa[i]-j;
        ///基数排序
        for(i=0;i<n;i++)wv[i]=x[y[i]];
        for(i=0;i<m;i++)wn[i]=0;
        for(i=0;i<n;i++)wn[wv[i]]++;
        for(i=1;i<m;i++)wn[i]+=wn[i-1];
        for(i=n-1;i>=0;i--)sa[--wn[wv[i]]]=y[i];
        ///当p=n的时候,说明所有串都已经排好序了
        ///在第一次排序以后,rank数组中的最大值小于p,所以让m=p
        for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
    }
    return;
}
///后缀数组  计算height数组
/**
height数组的值应该是从height[1]开始的,而且height[1]应该是等于0的。
原因是,+因为我们在字符串后面添加了一个0号字符,所以它必然是最小的
一个后缀。而字符串中的其他字符都应该是大于0的(前面有提到,使用倍
增算法前需要确保这点),所以排名第二的字符串和0号字符的公共前缀
(即height[1])应当为0.在调用calheight函数时,要注意height数组的范
围应该是[1..n]。所以调用时应该是calheight(r,sa,n)
而不是calheight(r,sa,n+1)。*/
int rank[maxn],height[maxn];
void calheight(int* r,int* sa,int n)
{
    int i,j,k=0;
    for(i=1;i<=n;i++)rank[sa[i]]=i;
    for(i=0;i<n;height[rank[i++]]=k)
    for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
    return;
}
int main()
{
        //后缀数组 倍增算法 使用方法
        /**
        在使用倍增算法前,需要保证r数组的值均大于0。然后要在原字
        符串后添加一个0号字符,具体原因参见罗穗骞的论文。这时候,
        若原串的长度为n,则实际要进行后缀数组构建的r数组的长度应
        该为n+1.所以调用DA函数时,对应的n应为n+1.*/
       /* int n=strlen(str);//str 待处理字符串
        for(int i=0;i<n;i++) a[i]=(int)str[i];
        a[n]=0;
        DA(a,sa,n+1,256);
        calheight(a,sa,n);*/
        //....................................
    int n,k;
    while(scanf("%d%d",&n,&k)==2)
    {
        for(int i=0;i<n;i++)
        {
            scanf("%d",&a[i]);a[i]++;//保证a[i]>0
        }
        a[n]=0;
        DA(a,sa,n+1,maxn);//这道题目的字符范围是maxn
        calheight(a,sa,n);

        /** 最长的出现最少k次的重复(可重叠)子串

            二分答案
        */
        int l=0,r=n,mid,ans=0;
        while(l<r)
        {
            mid=(l+r)>>1;
            int cnt=0,flag=0;
            for(int i=1;i<=n;i++)
            {
                if(height[i]>=mid) cnt++;
                if(height[i]<mid||i==n)//不能漏掉最后一个字符
                {
                    if(cnt+1>=k)
                    {
                        ans=max(ans,mid),flag=1;
                        break;
                    }
                    cnt=0;
                }
            }
            if(flag) l=mid+1;
            else r=mid;
        }
        printf("%d/n",ans);
    }
    return 0;
}

 

 

你可能感兴趣的:(poj 3261 Milk Patterns 最长的出现最少k次的重复(可重叠)子串 后缀数组)