后缀数组的应用

height[i] 表示排名第i的后缀和排名第i-1的后缀的最长公共前缀,也即sa[i]与sa[i-1]的最长公共前缀。

1、给定一个字符串,询问任意两个后缀的最长公共前缀,假设询问suffix(i)和suffix(j)的最长公共前缀,且rank[i] < rank[j],那么suffix(i)和suffix(j)的最长公共前缀的

长度为height[rank[i]+1],height[rank[i]+2]....height[rank[j]]的最小值,即一次RMQ(rank[i]+1,rank[j]);

RMQ的初始化为O(nlogn),询问为O(1)

 

2、给定一个字符串,问最长的重复子串的长度是多少(子串可重叠),重复字串的长度可转化为某两个后缀的最长公共前缀,任意两个后缀的最长公共前缀

相当于height数组中某一段的最小值,那么这个值一定不大于height数组中的最大值,所以最长重复子串的长度就是height数组的最大值

时间复杂度为O(n)

 

3、给定一个字符串,问最长的重复子串的长度是多少(子串不可重叠)。重复子串的长度肯定在[0,n]之间,所以我们可以二分枚举可能的答案,然后判定是否可行。

设答案为k,那么将连续的height[i]>=k的分为一组,这样组内,任意两个后缀重复长度才会>=k,然后判断组内最大的sa[i]与最小的sa[i]的差值是否>=k

时间复杂度为O(nlogn)

poj1743

  1 #include <stdio.h>

  2 #include <string.h>

  3 const int N = 20000 + 10;

  4 int rank[N],wb[N],bucket[N],height[N],count[N];

  5 int max(const int &a, const int &b)

  6 {

  7     return a > b ? a : b;

  8 }

  9 int min(const int &a, const int &b)

 10 {

 11     return a < b ? a : b;

 12 }

 13 void build_sa(int *r, int *sa, int n, int m)

 14 {

 15     memset(wb,0,sizeof(0));

 16     memset(rank,0,sizeof(rank));

 17     int *x=rank,*y=wb,*t,i,k,p;

 18     for(i=0; i<m; ++i) count[i] = 0;

 19     for(i=0; i<n; ++i) count[x[i] = r[i]]++;

 20     for(i=1; i<m; ++i) count[i] += count[i-1];

 21     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;

 22     for(k=1; k<=n; k<<=1)

 23     {

 24         for(p=0,i=n-k; i<n; ++i) y[p++] = i;

 25         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;

 26         for(i=0; i<n; ++i) bucket[i] = x[y[i]];

 27         for(i=0; i<m; ++i) count[i] = 0;

 28         for(i=0; i<n; ++i) count[bucket[i]]++;

 29         for(i=1; i<m; ++i) count[i] += count[i-1];

 30         for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i];

 31         t = x; x = y; y = t;

 32         p = 1; x[sa[0]] = 0;

 33         for(i=1; i<n; ++i)

 34             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++;

 35         m = p;

 36         

 37     }

 38 }

 39 void getHeight(int *r, int *sa, int n)

 40 {

 41     int i,j,k=0;

 42     for(i=1;i<=n; ++i) rank[sa[i]] = i;

 43     for(i=0; i<n;height[rank[i++]]=k)

 44         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);

 45         

 46 }

 47 

 48 bool check(int k, int *sa, int n)

 49 {

 50     int i,minStart = n+1,maxStart = -1;

 51     for(i=1; i<=n; ++i)

 52     {

 53         if(height[i] < k)

 54         {

 55             minStart = n+1;

 56             maxStart = -1;

 57         }

 58         else

 59         {

 60             minStart = min(minStart,min(sa[i],sa[i-1]));

 61             maxStart = max(maxStart,max(sa[i],sa[i-1]));

 62             if(maxStart - minStart >=k)

 63                 return true;

 64         }

 65     }

 66     return false;

 67 }

 68 int main()

 69 {

 70     int s[N],r[N],sa[N],n,i;

 71     while(scanf("%d",&n)!=EOF&&n)

 72     {

 73         memset(r,0,sizeof(r));

 74         for(i=0; i<n; ++i)

 75             scanf("%d",&s[i]);

 76         if(n<10)

 77         {

 78             printf("0\n");

 79             continue;

 80         }

 81         for(i=1; i<n; ++i)

 82             r[i-1] = s[i] - s[i-1] + 88;

 83         r[n--] = 0;

 84         build_sa(r, sa, n+1, 200);

 85         getHeight(r, sa, n);

 86         

 87         int L = 4, R = n/2+1,ans=0;

 88         while(L<=R)

 89         {

 90             int mid = (L+R)/2;

 91             if(check(mid,sa,n))

 92             {

 93                 ans = mid;

 94                 L = mid + 1;

 95             }

 96             else

 97                 R = mid - 1;

 98         }

 99         if(ans>=4)

100             printf("%d\n",ans+1);

101         else

102             printf("0\n");

103     }

104     

105     return 0;

106 }

107 

108 

109 //二分答案是指答案不好直接确定,是一个判定性的问题,就可以在满足条件的范围内二分枚举可能的答案,然后就答案进行判定
View Code

 

4、给定一个字符串,问重复k次的最长重复子串的长度,重复子串的长度肯定在[0,n]之间,所以我们可以二分枚举可能的答案(设为len),然后判定是否可行

可行的因素是存在k个连续height[i]>=len。

时间复杂度为O(nlogn)

 

 poj3261

 1 #include <stdio.h>

 2 #include <string.h>

 3 const int N = 20000 + 10;

 4 int bucket[N],wa[N],wb[N],rank[N],height[N],sa[N],r[N];

 5 int count[N];

 6 

 7 void build_sa(int n, int m)

 8 {

 9     int *x=wa,*y=wb,*t,i,k,p;

10     for(i=0; i<m; ++i) count[i] = 0;

11     for(i=0; i<n; ++i) count[x[i] = r[i]]++;

12     for(i=1; i<m; ++i) count[i] += count[i-1];

13     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;

14     for(k=1; k<=n; k<<=1)

15     {

16         for(p=0,i=n-k;i<n; ++i) y[p++] = i;

17         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;

18         for(i=0; i<n; ++i) bucket[i] = x[y[i]];

19         for(i=0; i<m; ++i) count[i] = 0;

20         for(i=0; i<n; ++i) count[bucket[i]]++;

21         for(i=1; i<m; ++i) count[i] += count[i-1];

22         for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i];

23         t = x; x = y; y = t;

24         p = 1; x[sa[0]] = 0;

25         for(i=1; i<n; ++i)

26             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++;

27         if(p>=n) break;

28         m = p;

29     }

30 }

31 void getHeight(int n)

32 {

33     int i,j,k=0;

34     for(i=0; i<=n; ++i) rank[sa[i]] = i;

35     for(i=0; i<n; height[rank[i++]]=k)

36         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);

37 }

38 

39 bool check(int len, int n, int k)

40 {

41     int cnt = 0,i;

42     for(i=1; i<=n; ++i)

43     {

44         if(height[i] < len)

45         {

46             cnt = 0;

47         }

48         else

49         {

50             cnt ++;

51             if(cnt == k)

52                 return true;

53         }

54     }

55     return false;

56 }

57 int main()

58 {

59     int n,k,i;

60     scanf("%d%d",&n,&k);

61     for(i=0; i<n; ++i)

62         scanf("%d",&r[i]);

63     r[n] = 0;

64     build_sa(n+1,N+1);

65     getHeight(n);

66     int ans = 0,L=0,R=n;

67     while(L<=R)

68     {

69         int mid = (L+R)>>1;

70         if(check(mid,n,k-1))

71         {

72             ans = mid;

73             L = mid+1;

74         }

75         else

76             R = mid - 1;

77     }

78     printf("%d\n",ans);

79     return 0;

80 }
View Code

 

5、给定一个字符串,问最长的回文子串的长度,枚举每一位,然后计算以这一位为中心的最长回文子串,但是要注意奇数和偶数的情况。两种情况都可以转化为

求一个后缀和一个反过来写的后缀的最长公共前缀,即将aaab变为aaab1baaa0,求以i为中心最长回文串,当回文串长度为奇数时,计算suffix(i+1)和和suffix(n-i)的长度*2+1,即为回文串的长度,当回文串的长度为偶数时,计算suffix(i)和suffix(n-i)的长度*2,即为回文串的长度,枚举所有的i,即可计算出所有的回文字串的长度,选择最长的一个即可。

 ural1297

  1 #include <stdio.h>

  2 #include <string.h>

  3 const int N = 110000+110000 + 10;

  4 int rank[N],wa[N],wb[N],height[N],count[N],bucket[N],sa[N],r[N],dp[N][33];

  5 char str[N];

  6 

  7 int min(const int &a, const int &b)

  8 {

  9     return a < b ? a : b;

 10 }

 11 int max(const int &a, const int &b)

 12 {

 13     return a > b ? a : b;

 14 }

 15 void build_sa(int n, int m)

 16 {

 17     int *x=wa,*y=wb,*t,i,k,p;

 18     for(i=0; i<m; ++i) count[i] = 0;

 19     for(i=0; i<n; ++i) count[x[i]=r[i]]++;

 20     for(i=1; i<m; ++i) count[i] += count[i-1];

 21     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;

 22     for(k=1; k<=n; k<<=1)

 23     {

 24         for(p=0,i=n-k;i<n; ++i) y[p++] = i;

 25         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;

 26         for(i=0; i<n; ++i) bucket[i] = x[y[i]];

 27         for(i=0; i<m; ++i) count[i] = 0;

 28         for(i=0; i<n; ++i) count[bucket[i]]++;

 29         for(i=1; i<m; ++i) count[i] += count[i-1];

 30         for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i];

 31         t = x; x = y; y = t;

 32         p = 1;x[sa[0]] = 0;

 33         for(i=1; i<n; ++i)

 34             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++;

 35         if(p>=n) break;

 36         m = p;//没这句就会runtime error,想不通

 37     }

 38 }

 39 void getHeight(int n)

 40 {

 41     int i,j,k=0;

 42     for(i=1; i<=n; ++i) rank[sa[i]] = i;

 43     for(i=0; i<n; height[rank[i++]]=k)

 44         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);

 45 }

 46 void RMQ_init(int n)

 47 {

 48     int i,j;

 49     for(i=1; i<=n; ++i) dp[i][0] = height[i];

 50     for(j=1; (1<<j)<=n; ++j)

 51         for(i=1; i+(1<<j)-1<=n; ++i)

 52             dp[i][j] = min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]);

 53 }

 54 

 55 int lcp(int i, int j)

 56 {

 57     int L = rank[i] + 1,R = rank[j];

 58     if(rank[i] > rank[j])

 59     {

 60         L = rank[j]+1;

 61         R = rank[i];

 62     }

 63     int k =0;

 64     while(1<<(k+1)<=R-L+1) k++;

 65     return min(dp[L][k],dp[R-(1<<k)+1][k]);

 66 }

 67 int main()

 68 {

 69     int n,i,j,nn;

 70     while(scanf("%s",str)!=EOF)

 71     {

 72         nn = n = strlen(str);

 73     

 74         for(i=0; i<n; ++i)

 75             r[i] = str[i] ;

 76         j = n - 1;

 77         i = n + 1;

 78         r[n] = 1;

 79         //n = 2*n+1;

 80         r[n*2+1] = 0;

 81         

 82         for(;j>=0;--j,++i)

 83             r[i] = str[j];

 84         build_sa(2*n+2,200);

 85         getHeight(2*n+1);

 86         int maxLen = -1;

 87         int ret = -1;

 88         int index;

 89         RMQ_init(n*2+1);

 90 

 91         

 92         for(i=0; i<n; ++i)

 93         {

 94             j = 2 * n - i + 1 ;

 95             ret = lcp(i+1,j) * 2 + 1;

 96             if(ret > maxLen)

 97             {

 98                 maxLen = ret;

 99                 index = i+1;

100             }

101             if(i>0)

102             {

103                 ret = lcp(i,j) * 2;

104                 if(ret > maxLen)

105                 {

106                     maxLen = ret;

107                     index = i;

108                 }

109             }

110         }

111         printf("%d\n",maxLen);

112     }

113     

114     /*

115     if(maxLen % 2 == 1)

116     {

117         for(i=index-maxLen/2-1;i<=index-1+maxLen/2;++i)

118             printf("%c",str[i]);

119     }

120     else

121     {

122         for(i=index-maxLen/2; i<=index-1+maxLen/2;++i)

123             printf("%c",str[i]);

124     }

125     puts("");*/

126         

127     

128 }
View Code

 

6、给定一个字符串,求不相同的子串的个数(distinct substrings),每个子串都是某个后缀的前缀,如果按照suffix(sa[1]),suffix(sa[2])...suffix(sa[n])的顺序计算,可以发现

对于加进来的每个后缀,它将产生n-sa[i]个前缀,但是其中后height[i]个前缀是重复的,所以对于suffix(sa[i])将贡献n-sa[i]+height[i]个子串,累加之后就是答案。

 spoj694

 1 #include <stdio.h>

 2 #include <string.h>

 3 const int N = 50000 + 10;

 4 int rank[N],sa[N],height[N],wa[N],wb[N],count[N],bucket[N],r[N];

 5 char str[N];

 6 

 7 void build_sa(int n, int m)

 8 {

 9     int *x=wa,*y=wb,*t,i,k,p;

10     for(i=0; i<m; ++i) count[i] = 0;

11     for(i=0; i<n; ++i) count[x[i] = r[i]]++;

12     for(i=1; i<m; ++i) count[i] += count[i-1];

13     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;

14     for(k=1; k<=n; k<<=1)

15     {

16         for(p=0,i=n-k; i<n; ++i) y[p++] = i;

17         for(i=0; i<n; ++i) if(sa[i] >= k) y[p++] = sa[i] - k;

18         for(i=0; i<n; ++i) bucket[i] = x[y[i]];

19         for(i=0; i<m; ++i) count[i] = 0;

20         for(i=0; i<n; ++i) count[bucket[i]]++;

21         for(i=1; i<m; ++i) count[i] += count[i-1];

22         for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i];

23         t = x; x = y; y = t;

24         p =1 ;x[sa[0]] = 0;

25         for(i=1; i<n; ++i)

26             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++;

27         m = p;

28     }

29 }

30 void getHeight(int n)

31 {

32     int i,j,k = 0;

33     for(i=1; i<=n; ++i) rank[sa[i]] = i;

34     for(i=0; i<n; height[rank[i++]]=k)

35         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);

36 }

37 

38 int solve(int n)

39 {

40     int ans = 0,i;

41     for(i=1; i<=n; ++i)

42         ans += n - sa[i]  - height[i];

43     return ans;

44 }

45 int main()

46 {

47     int t,n,i;

48     scanf("%d",&t);

49     while(t--)

50     {

51         scanf("%s",str);

52         n = strlen(str);

53         for(i=0; i<n; ++i)

54             r[i] = str[i] ;

55         r[n] = 0;

56         build_sa(n+1,150);

57         getHeight(n);

58         printf("%d\n",solve(n));

59     }

60     return 0;

61 }
View Code

 

以下是两个字符串相关的问题:

7、给定2个字符串,求2个字符串的最长公共子串(longest  common substrings),可转换为求2个字符串后缀的最长公共前缀,将两个字符串连起来,中间用个没出现过的字符分割

求出height数组后,扫描出height数组中的最大值即可,当然,要求组sa[i]属于一个串,sa[i-1]属于另一个串。

 poj2774

 1 #include <stdio.h>

 2 #include <string.h>

 3 const int N = 100000+100000 + 10;

 4 int rank[N],wa[N],wb[N],height[N],count[N],bucket[N],sa[N],r[N];

 5 char str[N];

 6 

 7 

 8 void build_sa(int n, int m)

 9 {

10     int *x=wa,*y=wb,*t,i,k,p;

11     for(i=0; i<m; ++i) count[i] = 0;

12     for(i=0; i<n; ++i) count[x[i]=r[i]]++;

13     for(i=1; i<m; ++i) count[i] += count[i-1];

14     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;

15     for(k=1; k<=n; k<<=1)

16     {

17         for(p=0,i=n-k;i<n; ++i) y[p++] = i;

18         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;

19         for(i=0; i<n; ++i) bucket[i] = x[y[i]];

20         for(i=0; i<m; ++i) count[i] = 0;

21         for(i=0; i<n; ++i) count[bucket[i]]++;

22         for(i=1; i<m; ++i) count[i] += count[i-1];

23         for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i];

24         t = x; x = y; y = t;

25         p = 1;x[sa[0]] = 0;

26         for(i=1; i<n; ++i)

27             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++;

28         if(p>=n) break;

29         m = p;//没这句就会runtime error,想不通

30     }

31 }

32 void getHeight(int n)

33 {

34     int i,j,k=0;

35     for(i=1; i<=n; ++i) rank[sa[i]] = i;

36     for(i=0; i<n; height[rank[i++]]=k)

37         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);

38 }

39 

40 int solve(int n, int m)

41 {

42     int max = 0;

43     int i;

44     for(i=1; i<=n+m+1; ++i)

45     {

46         if(height[i] > max)

47         {

48             if(sa[i]>=n+1 && sa[i-1]<n)

49                 max = height[i];

50             else if(sa[i]<n && sa[i-1]>=n+1)

51                 max = height[i];

52         }

53     }

54     return max;

55 }

56 int main()

57 {

58     int n,m,j,i;

59     while(scanf("%s",str)!=EOF)

60     {

61         n = strlen(str);

62         for(i=0; i<n; ++i)

63             r[i] = str[i] - 'a' + 2;

64         r[n] = 1;

65         scanf("%s",str);

66         m = strlen(str);

67         for(j=0,i=n+1; i<=m+n; ++i,++j)

68             r[i] = str[j] - 'a' + 2;

69         r[n+m+1] = 0;

70         build_sa(n+m+2,30);

71         getHeight(n+m+1);

72         printf("%d\n",solve(n,m));

73     }

74 }
View Code

 

8、给定2个字符串,求出两个字符串所有公共子串中长度大于k的子串的个数,将两个字符串连起来,中间用个没出现过的字符分割

求出height数组后,将height[i]>k的分为一组,然后组内进行扫描,但是要用一个单调栈来维护后缀值。

 poj3415

  1 /*

  2 长度不小于k的公共子串的个数

  3 将连续的height[i]>=k的分为一组

  4 */#include <stdio.h>

  5 #include <string.h>

  6 typedef __int64 LL;

  7 const int N = 200005;

  8 int rank[N],r[N],height[N],sa[N],wa[N],wb[N],count[N];

  9 char str[N];

 10 int stack[N][2];

 11 int k;

 12 void build_sa(int n, int m)

 13 {

 14     int *x=wa,*y=wb,*t,i,p,k;

 15     for(i=0; i<m; ++i) count[i] = 0;

 16     for(i=0; i<n; ++i) count[x[i] = r[i]] ++;

 17     for(i=1; i<m; ++i) count[i] += count[i-1];

 18     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;

 19     for(k=1; k<=n; k<<=1)

 20     {

 21         for(p=0,i=n-k; i<n; ++i) y[p++] = i;

 22         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;

 23         for(i=0; i<m; ++i) count[i] = 0;

 24         for(i=0; i<n; ++i) count[x[y[i]]]++;

 25         for(i=1; i<m; ++i) count[i] += count[i-1];

 26         for(i=n-1; i>=0; --i) sa[--count[x[y[i]]]] = y[i];

 27         t = x; x = y; y = t;

 28         p = 1; x[sa[0]] = 0;

 29         for(i=1; i<n; ++i)

 30             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;

 31         m = p;

 32     }

 33 }

 34 void getHeight(int n)

 35 {

 36     int i,j,k = 0;

 37     for(i=1; i<=n; ++i) rank[sa[i]] = i;

 38     for(i=0; i<n; height[rank[i++]]=k)

 39         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);

 40 

 41 }

 42 

 43 

 44 int main()

 45 {

 46     int n ,m, i,j;

 47     LL total,top,sum;

 48     while(scanf("%d",&k)!=EOF)

 49     {

 50         if(k==0)

 51             break;

 52         sum = 0;

 53         scanf("%s",str);

 54         n = strlen(str);

 55         for(i=0; i<n; ++i)

 56             r[i] = str[i] ;

 57         r[n] = 1;

 58         scanf("%s",str);

 59         m = strlen(str);

 60         for(i=n+1,j=0;j<m; ++j,++i)

 61             r[i] = str[j] ;

 62         r[n+m+1] = 0;

 63         build_sa(n+m+2,130);

 64         getHeight(n+m+1);

 65         total = 0,top = 0;

 66         for(i=1; i<=n+m+1; ++i)

 67         {

 68             if(height[i] < k) total = 0,top = 0;

 69             else

 70             {

 71                 int cnt = 0;

 72                 if(sa[i-1] < n) cnt++,total += height[i] - k + 1;

 73                 while(top>0 && height[i]<=stack[top-1][0])

 74                 {

 75                     top--;

 76                     total -= (stack[top][0] - height[i]) * stack[top][1];

 77                     cnt += stack[top][1];

 78                 }

 79                 stack[top][0] = height[i];stack[top++][1] = cnt;

 80                 if(sa[i] > n) sum += total;    

 81             }

 82         }

 83         total = 0,top = 0;

 84         for(i=1; i<=n+m+1; ++i)

 85         {

 86             if(height[i] < k) total = 0,top = 0;

 87             else

 88             {

 89                 int cnt = 0;

 90                 if(sa[i-1] > n) cnt++,total += height[i] - k + 1;

 91                 while(top>0 && height[i]<=stack[top-1][0])

 92                 {

 93                     top--;

 94                     total -= (stack[top][0] - height[i]) * stack[top][1];

 95                     cnt += stack[top][1];

 96                 }

 97                 stack[top][0] = height[i];stack[top++][1] = cnt;

 98                 if(sa[i] < n) sum += total;    

 99             }

100         }

101         printf("%I64d\n",sum);

102     }

103 }
View Code

 

9、给定n个字符串,求出最长公共子串,要求这个子串至少出现在k(k<=n)个字符串中,求出height数组后,二分答案,然后将height分组,然后判断组内的后缀是否属于k个以上的原字符串

 poj3294

  1 /*

  2 给定n个字符串,求最长子串的长度,要求该子串至少出现在k个字符串中

  3 二分答案,找到一组height[i]>=k且组中有k个以上的后缀属于不同的字符串

  4 */

  5 #include <stdio.h>

  6 #include <string.h>

  7 const int N = 200000+10;

  8 int rank[N],r[N],height[N],sa[N],wa[N],wb[N],count[N],mark[N],vis[111],index[111];

  9 char str[N];

 10 int k,c,cc;

 11 void build_sa(int n, int m)

 12 {

 13     int *x=wa,*y=wb,*t,i,p,k;

 14     for(i=0; i<m; ++i) 

 15         count[i] = 0;

 16 

 17     for(i=0; i<n; ++i) 

 18         count[x[i] = r[i]] ++;

 19     for(i=1; i<m; ++i) count[i] += count[i-1];

 20     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;

 21     for(k=1; k<=n; k<<=1)

 22     {

 23         for(p=0,i=n-k; i<n; ++i) y[p++] = i;

 24         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;

 25         for(i=0; i<m; ++i) count[i] = 0;

 26         for(i=0; i<n; ++i) count[x[y[i]]]++;

 27         for(i=1; i<m; ++i) count[i] += count[i-1];

 28         for(i=n-1; i>=0; --i) sa[--count[x[y[i]]]] = y[i];

 29         t = x; x = y; y = t;

 30         p = 1; x[sa[0]] = 0;

 31         for(i=1; i<n; ++i)

 32             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;

 33         m = p;

 34     }

 35 }

 36 void getHeight(int n)

 37 {

 38     int i,j,k = 0;

 39     for(i=1; i<=n; ++i) rank[sa[i]] = i;

 40     for(i=0; i<n; height[rank[i++]]=k)

 41         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);

 42 

 43 }

 44 bool check(int n, int len)

 45 {

 46     memset(vis,0,sizeof(vis));

 47     int i;

 48     int cnt = 0;

 49     bool flag = false;

 50     c = 0;

 51     bool first = true;

 52     for(i=1; i<=n; ++i)

 53     {

 54         if(height[i] < len)

 55         {

 56             memset(vis,0,sizeof(vis));

 57             first = true;

 58             cnt = 0;

 59         }

 60         else

 61         {

 62             if(!vis[mark[sa[i-1]]])

 63                 vis[mark[sa[i-1]]]++,cnt++;

 64             if(!vis[mark[sa[i]]])

 65                 vis[mark[sa[i]]]++,cnt++;

 66             if(cnt>=k)

 67             {

 68                 if(first)

 69                     index[c++] = sa[i-1];

 70                 first = false;

 71                 flag = true;

 72             }    

 73         }

 74     }

 75     if(flag)    

 76     {

 77         cc = c;

 78         return true;

 79     }    

 80     

 81     return false;

 82 }

 83 int solve(int n)

 84 {

 85     int l = 0,r = n,mid;

 86     int ans = 0,i;

 87     while(l<=r)

 88     {

 89         mid = (l + r) >> 1;

 90         if(check(n,mid))

 91         {

 92             ans = mid;

 93             l = mid + 1;

 94         }

 95         else

 96             r = mid - 1;

 97     }

 98     return ans;

 99 }

100 int main()

101 {

102     freopen("in.txt","r",stdin);

103     int n,m,i,j,len;

104     int t = 0;

105     while(scanf("%d",&n),n)

106     {

107         memset(height,0,sizeof(height));

108         len = c = cc = 0;

109         memset(mark,0,sizeof(mark));

110         for(j=1; j<=n; ++j)

111         {

112             scanf("%s",str);

113             m = strlen(str);

114             for(i=0;i<m; ++i)

115             {

116                 r[len] = str[i] + 100;

117                 mark[len++] = j;

118             }

119             r[len++] = n-j;

120         }

121         build_sa(len,300);

122         getHeight(len-1);

123         k = n/2+1;

124         t++;

125         if(t!=1)

126             puts("");

127         int max = solve(len);

128         if(max!=0)

129         {

130             for(i=0; i<cc; ++i)

131             {

132                 for(j=0; j<max; ++j)

133                     printf("%c",r[j+index[i]]-100);

134                 puts("");

135             }    

136         }

137         else

138             puts("?");

139         

140     }

141 }
View Code

 

10、每个字符串中至少出现2次且不重叠的最长子串,求出height数组中,二分答案,然后将height分组,然后判断是否符号要求,怎么判断,根据上面的经验就可以知道了,不多说了。

spoj220

  1 /*http://www.spoj.com/problems/PHRASES/

  2 给定n个字符串,求最长子串的长度,要求该子串至少出现在k个字符串中

  3 二分答案,找到一组height[i]>=k且组中有k个以上的后缀属于不同的字符串

  4 */

  5 #include <stdio.h>

  6 #include <string.h>

  7 const int N = 200000+10;

  8 const int INF = 1<<30;

  9 int rank[N],r[N],height[N],sa[N],wa[N],wb[N],count[N],mark[N],idx[N][11],cnt[11];

 10 char str[N];

 11 int m;

 12 void build_sa(int n, int m)

 13 {

 14     int *x=wa,*y=wb,*t,i,p,k;

 15     for(i=0; i<m; ++i) 

 16         count[i] = 0;

 17 

 18     for(i=0; i<n; ++i) 

 19         count[x[i] = r[i]] ++;

 20     for(i=1; i<m; ++i) count[i] += count[i-1];

 21     for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i;

 22     for(k=1; k<=n; k<<=1)

 23     {

 24         for(p=0,i=n-k; i<n; ++i) y[p++] = i;

 25         for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k;

 26         for(i=0; i<m; ++i) count[i] = 0;

 27         for(i=0; i<n; ++i) count[x[y[i]]]++;

 28         for(i=1; i<m; ++i) count[i] += count[i-1];

 29         for(i=n-1; i>=0; --i) sa[--count[x[y[i]]]] = y[i];

 30         t = x; x = y; y = t;

 31         p = 1; x[sa[0]] = 0;

 32         for(i=1; i<n; ++i)

 33             x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;

 34         m = p;

 35     }

 36 }

 37 void getHeight(int n)

 38 {

 39     int i,j,k = 0;

 40     for(i=1; i<=n; ++i) rank[sa[i]] = i;

 41     for(i=0; i<n; height[rank[i++]]=k)

 42         for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);

 43 

 44 }

 45 

 46 bool check(int n, int len)

 47 {

 48     int i,j,k,mn,mx;

 49     int is_ok = 0;

 50     memset(cnt,0,sizeof(cnt));

 51     for(k=1; k<=n; ++k)

 52     {

 53         if(height[k] < len)

 54         {

 55             

 56             for(i=1; i<=m; ++i)

 57             {

 58                 mn = INF;mx = 0;

 59                 for(j=0; j<cnt[i]; ++j)

 60                 {

 61                     if(idx[j][i] > mx)

 62                         mx = idx[j][i];

 63                     if(idx[j][i] < mn)

 64                         mn = idx[j][i];

 65                 }

 66                 if(mx - mn >= len)

 67                     is_ok++;

 68                     

 69             }

 70             if(is_ok==m)

 71                 break;

 72             is_ok = 0;

 73             memset(cnt,0,sizeof(cnt));

 74         }

 75         else

 76         {

 77             idx[cnt[mark[sa[k]]]++][mark[sa[k]]] = sa[k];

 78             idx[cnt[mark[sa[k-1]]]++][mark[sa[k-1]]] = sa[k-1];

 79             

 80         }

 81     }

 82     if(is_ok==m)

 83         return true;

 84     return false;

 85 }

 86 int solve(int n)

 87 {

 88     int l = 0,r = n, mid;

 89     int ans = 0;

 90     while(l<=r)

 91     {

 92         mid = (l+r)>>1;

 93         if(check(n,mid))

 94         {

 95             ans = mid;

 96             l = mid + 1;

 97         }

 98         else

 99             r = mid - 1;

100     }

101     return ans;

102 }

103 int main()

104 {

105     

106     int t,n,i,j,len;

107     scanf("%d",&t);

108     while(t--)

109     {

110         n = 0;

111         memset(mark,0,sizeof(mark));

112         scanf("%d",&m);

113         for(i=1; i<=m; ++i)

114         {

115             scanf("%s",str);

116             len = strlen(str);

117             for(j=0; j<len; ++j)

118             {

119                 r[n] = str[j];

120                 mark[n++] = i;

121             }

122             r[n++] = m - i;

123         }

124         build_sa(n,130);

125         getHeight(n-1);

126         printf("%d\n",solve(n));

127     }

128 }
View Code

 

你可能感兴趣的:(后缀数组)