[最长公共子串-后缀数组]hdoj 1403:Longest Common Substring

大致题意:
    如题。

 

大致思路:
    后缀数组+二分的简单应用,可以扩展到多串匹配中去

 

#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
const int nMax = 500000;

int  num[nMax];
int sa[nMax], rank[nMax], height[nMax];
int wa[nMax], wb[nMax], wv[nMax], wd[nMax];

int cmp(int *r, int a, int b, int l){
    return r[a] == r[b] && r[a+l] == r[b+l];
}

void da(int *r, int n, int m){          //  倍增算法 r为待匹配数组  n为总长度 m为字符范围
    int i, j, p, *x = wa, *y = wb, *t;
    for(i = 0; i < m; i ++) wd[i] = 0;
    for(i = 0; i < n; i ++) wd[x[i]=r[i]] ++;
    for(i = 1; i < m; i ++) wd[i] += wd[i-1];
    for(i = n-1; i >= 0; i --) sa[-- wd[x[i]]] = i;
    for(j = 1, p = 1; p < n; j *= 2, m = p){
        for(p = 0, i = n-j; i < n; i ++) y[p ++] = i;
        for(i = 0; i < n; i ++) if(sa[i] >= j) y[p ++] = sa[i] - j;
        for(i = 0; i < n; i ++) wv[i] = x[y[i]];
        for(i = 0; i < m; i ++) wd[i] = 0;
        for(i = 0; i < n; i ++) wd[wv[i]] ++;
        for(i = 1; i < m; i ++) wd[i] += wd[i-1];
        for(i = n-1; i >= 0; i --) sa[-- wd[wv[i]]] = y[i];
        for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; i ++){
            x[sa[i]] = cmp(y, sa[i-1], sa[i], j) ? p - 1: p ++;
        }
    }
}

void calHeight(int *r, int n){           //  求height数组。
    int i, j, k = 0;
    for(i = 1; i <= n; i ++) rank[sa[i]] = i;
    for(i = 0; i < n; height[rank[i ++]] = k){
        for(k ? k -- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k ++);
    }
}

int loc[nMax];
char str[nMax],res[nMax];
bool vis[1004];

bool check(int mid,int len){
    int i,j,tot;
    tot=0;
    memset(vis,0,sizeof(vis));
    for(i=2;i<=len;i++){
        if(height[i]<mid){
            memset(vis,0,sizeof(vis));
            tot=0;
        }
        else{
            if(!vis[loc[sa[i-1]]]){
                vis[loc[sa[i-1]]]=1;
                tot++;
            }
            if(!vis[loc[sa[i]]]){
                vis[loc[sa[i]]]=1;
                tot++;
            }
            if(tot==2){
//                for(j=0;j<mid;j++){
//                    res[j]=num[sa[i]+j]+'A'-1;
//                }res[mid]='\0';
                return 1;
            }
        }
    }
    return 0;
}

int main(){
    int n,k,i,j,a,b,sp,ans,cas;
   // scanf("%d",&cas);
    while(scanf("%s",str)!=EOF){
        sp=31;    //分隔符
        n=0;
        ans=0;
        for(j=0;str[j];j++){
            loc[n]=1;
            num[n++]=str[j]-'a'+1;
        }
        loc[n]=sp;
        num[n++]=sp++;
        scanf("%s",str);
        for(j=0;str[j];j++){
            loc[n]=2;
            num[n++]=str[j]-'a'+1;
        }
        loc[n]=sp;
        num[n++]=sp++;
        num[n]=0;
        da(num,n+1,sp+1);
        calHeight(num,n);
        int left=0,right=strlen(str),mid;//开始二分
        while(right>=left){
            mid=(right+left)/2;
            if(check(mid,n)){         //判断长度为mid的串是否是所有字符串的公共子串
                left=mid+1;
                ans=mid;
            }
            else{
                right=mid-1;
            }
        }
        cout<<ans<<endl;
    }
    return 0;
}
 

你可能感兴趣的:(数据结构,ACM,后缀数组,hdoj 1403,最长公共子串)