height[i] 表示排名第i的后缀和排名第i-1的后缀的最长公共前缀,也即sa[i]与sa[i-1]的最长公共前缀。
1、给定一个字符串,询问任意两个后缀的最长公共前缀,假设询问suffix(i)和suffix(j)的最长公共前缀,且rank[i] < rank[j],那么suffix(i)和suffix(j)的最长公共前缀的
长度为height[rank[i]+1],height[rank[i]+2]....height[rank[j]]的最小值,即一次RMQ(rank[i]+1,rank[j]);
RMQ的初始化为O(nlogn),询问为O(1)
2、给定一个字符串,问最长的重复子串的长度是多少(子串可重叠),重复字串的长度可转化为某两个后缀的最长公共前缀,任意两个后缀的最长公共前缀
相当于height数组中某一段的最小值,那么这个值一定不大于height数组中的最大值,所以最长重复子串的长度就是height数组的最大值
时间复杂度为O(n)
3、给定一个字符串,问最长的重复子串的长度是多少(子串不可重叠)。重复子串的长度肯定在[0,n]之间,所以我们可以二分枚举可能的答案,然后判定是否可行。
设答案为k,那么将连续的height[i]>=k的分为一组,这样组内,任意两个后缀重复长度才会>=k,然后判断组内最大的sa[i]与最小的sa[i]的差值是否>=k
时间复杂度为O(nlogn)
poj1743
1 #include <stdio.h> 2 #include <string.h> 3 const int N = 20000 + 10; 4 int rank[N],wb[N],bucket[N],height[N],count[N]; 5 int max(const int &a, const int &b) 6 { 7 return a > b ? a : b; 8 } 9 int min(const int &a, const int &b) 10 { 11 return a < b ? a : b; 12 } 13 void build_sa(int *r, int *sa, int n, int m) 14 { 15 memset(wb,0,sizeof(0)); 16 memset(rank,0,sizeof(rank)); 17 int *x=rank,*y=wb,*t,i,k,p; 18 for(i=0; i<m; ++i) count[i] = 0; 19 for(i=0; i<n; ++i) count[x[i] = r[i]]++; 20 for(i=1; i<m; ++i) count[i] += count[i-1]; 21 for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i; 22 for(k=1; k<=n; k<<=1) 23 { 24 for(p=0,i=n-k; i<n; ++i) y[p++] = i; 25 for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k; 26 for(i=0; i<n; ++i) bucket[i] = x[y[i]]; 27 for(i=0; i<m; ++i) count[i] = 0; 28 for(i=0; i<n; ++i) count[bucket[i]]++; 29 for(i=1; i<m; ++i) count[i] += count[i-1]; 30 for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i]; 31 t = x; x = y; y = t; 32 p = 1; x[sa[0]] = 0; 33 for(i=1; i<n; ++i) 34 x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++; 35 m = p; 36 37 } 38 } 39 void getHeight(int *r, int *sa, int n) 40 { 41 int i,j,k=0; 42 for(i=1;i<=n; ++i) rank[sa[i]] = i; 43 for(i=0; i<n;height[rank[i++]]=k) 44 for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); 45 46 } 47 48 bool check(int k, int *sa, int n) 49 { 50 int i,minStart = n+1,maxStart = -1; 51 for(i=1; i<=n; ++i) 52 { 53 if(height[i] < k) 54 { 55 minStart = n+1; 56 maxStart = -1; 57 } 58 else 59 { 60 minStart = min(minStart,min(sa[i],sa[i-1])); 61 maxStart = max(maxStart,max(sa[i],sa[i-1])); 62 if(maxStart - minStart >=k) 63 return true; 64 } 65 } 66 return false; 67 } 68 int main() 69 { 70 int s[N],r[N],sa[N],n,i; 71 while(scanf("%d",&n)!=EOF&&n) 72 { 73 memset(r,0,sizeof(r)); 74 for(i=0; i<n; ++i) 75 scanf("%d",&s[i]); 76 if(n<10) 77 { 78 printf("0\n"); 79 continue; 80 } 81 for(i=1; i<n; ++i) 82 r[i-1] = s[i] - s[i-1] + 88; 83 r[n--] = 0; 84 build_sa(r, sa, n+1, 200); 85 getHeight(r, sa, n); 86 87 int L = 4, R = n/2+1,ans=0; 88 while(L<=R) 89 { 90 int mid = (L+R)/2; 91 if(check(mid,sa,n)) 92 { 93 ans = mid; 94 L = mid + 1; 95 } 96 else 97 R = mid - 1; 98 } 99 if(ans>=4) 100 printf("%d\n",ans+1); 101 else 102 printf("0\n"); 103 } 104 105 return 0; 106 } 107 108 109 //二分答案是指答案不好直接确定,是一个判定性的问题,就可以在满足条件的范围内二分枚举可能的答案,然后就答案进行判定
4、给定一个字符串,问重复k次的最长重复子串的长度,重复子串的长度肯定在[0,n]之间,所以我们可以二分枚举可能的答案(设为len),然后判定是否可行
可行的因素是存在k个连续height[i]>=len。
时间复杂度为O(nlogn)
poj3261
1 #include <stdio.h> 2 #include <string.h> 3 const int N = 20000 + 10; 4 int bucket[N],wa[N],wb[N],rank[N],height[N],sa[N],r[N]; 5 int count[N]; 6 7 void build_sa(int n, int m) 8 { 9 int *x=wa,*y=wb,*t,i,k,p; 10 for(i=0; i<m; ++i) count[i] = 0; 11 for(i=0; i<n; ++i) count[x[i] = r[i]]++; 12 for(i=1; i<m; ++i) count[i] += count[i-1]; 13 for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i; 14 for(k=1; k<=n; k<<=1) 15 { 16 for(p=0,i=n-k;i<n; ++i) y[p++] = i; 17 for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k; 18 for(i=0; i<n; ++i) bucket[i] = x[y[i]]; 19 for(i=0; i<m; ++i) count[i] = 0; 20 for(i=0; i<n; ++i) count[bucket[i]]++; 21 for(i=1; i<m; ++i) count[i] += count[i-1]; 22 for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i]; 23 t = x; x = y; y = t; 24 p = 1; x[sa[0]] = 0; 25 for(i=1; i<n; ++i) 26 x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++; 27 if(p>=n) break; 28 m = p; 29 } 30 } 31 void getHeight(int n) 32 { 33 int i,j,k=0; 34 for(i=0; i<=n; ++i) rank[sa[i]] = i; 35 for(i=0; i<n; height[rank[i++]]=k) 36 for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); 37 } 38 39 bool check(int len, int n, int k) 40 { 41 int cnt = 0,i; 42 for(i=1; i<=n; ++i) 43 { 44 if(height[i] < len) 45 { 46 cnt = 0; 47 } 48 else 49 { 50 cnt ++; 51 if(cnt == k) 52 return true; 53 } 54 } 55 return false; 56 } 57 int main() 58 { 59 int n,k,i; 60 scanf("%d%d",&n,&k); 61 for(i=0; i<n; ++i) 62 scanf("%d",&r[i]); 63 r[n] = 0; 64 build_sa(n+1,N+1); 65 getHeight(n); 66 int ans = 0,L=0,R=n; 67 while(L<=R) 68 { 69 int mid = (L+R)>>1; 70 if(check(mid,n,k-1)) 71 { 72 ans = mid; 73 L = mid+1; 74 } 75 else 76 R = mid - 1; 77 } 78 printf("%d\n",ans); 79 return 0; 80 }
5、给定一个字符串,问最长的回文子串的长度,枚举每一位,然后计算以这一位为中心的最长回文子串,但是要注意奇数和偶数的情况。两种情况都可以转化为
求一个后缀和一个反过来写的后缀的最长公共前缀,即将aaab变为aaab1baaa0,求以i为中心最长回文串,当回文串长度为奇数时,计算suffix(i+1)和和suffix(n-i)的长度*2+1,即为回文串的长度,当回文串的长度为偶数时,计算suffix(i)和suffix(n-i)的长度*2,即为回文串的长度,枚举所有的i,即可计算出所有的回文字串的长度,选择最长的一个即可。
ural1297
1 #include <stdio.h> 2 #include <string.h> 3 const int N = 110000+110000 + 10; 4 int rank[N],wa[N],wb[N],height[N],count[N],bucket[N],sa[N],r[N],dp[N][33]; 5 char str[N]; 6 7 int min(const int &a, const int &b) 8 { 9 return a < b ? a : b; 10 } 11 int max(const int &a, const int &b) 12 { 13 return a > b ? a : b; 14 } 15 void build_sa(int n, int m) 16 { 17 int *x=wa,*y=wb,*t,i,k,p; 18 for(i=0; i<m; ++i) count[i] = 0; 19 for(i=0; i<n; ++i) count[x[i]=r[i]]++; 20 for(i=1; i<m; ++i) count[i] += count[i-1]; 21 for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i; 22 for(k=1; k<=n; k<<=1) 23 { 24 for(p=0,i=n-k;i<n; ++i) y[p++] = i; 25 for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k; 26 for(i=0; i<n; ++i) bucket[i] = x[y[i]]; 27 for(i=0; i<m; ++i) count[i] = 0; 28 for(i=0; i<n; ++i) count[bucket[i]]++; 29 for(i=1; i<m; ++i) count[i] += count[i-1]; 30 for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i]; 31 t = x; x = y; y = t; 32 p = 1;x[sa[0]] = 0; 33 for(i=1; i<n; ++i) 34 x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++; 35 if(p>=n) break; 36 m = p;//没这句就会runtime error,想不通 37 } 38 } 39 void getHeight(int n) 40 { 41 int i,j,k=0; 42 for(i=1; i<=n; ++i) rank[sa[i]] = i; 43 for(i=0; i<n; height[rank[i++]]=k) 44 for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); 45 } 46 void RMQ_init(int n) 47 { 48 int i,j; 49 for(i=1; i<=n; ++i) dp[i][0] = height[i]; 50 for(j=1; (1<<j)<=n; ++j) 51 for(i=1; i+(1<<j)-1<=n; ++i) 52 dp[i][j] = min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]); 53 } 54 55 int lcp(int i, int j) 56 { 57 int L = rank[i] + 1,R = rank[j]; 58 if(rank[i] > rank[j]) 59 { 60 L = rank[j]+1; 61 R = rank[i]; 62 } 63 int k =0; 64 while(1<<(k+1)<=R-L+1) k++; 65 return min(dp[L][k],dp[R-(1<<k)+1][k]); 66 } 67 int main() 68 { 69 int n,i,j,nn; 70 while(scanf("%s",str)!=EOF) 71 { 72 nn = n = strlen(str); 73 74 for(i=0; i<n; ++i) 75 r[i] = str[i] ; 76 j = n - 1; 77 i = n + 1; 78 r[n] = 1; 79 //n = 2*n+1; 80 r[n*2+1] = 0; 81 82 for(;j>=0;--j,++i) 83 r[i] = str[j]; 84 build_sa(2*n+2,200); 85 getHeight(2*n+1); 86 int maxLen = -1; 87 int ret = -1; 88 int index; 89 RMQ_init(n*2+1); 90 91 92 for(i=0; i<n; ++i) 93 { 94 j = 2 * n - i + 1 ; 95 ret = lcp(i+1,j) * 2 + 1; 96 if(ret > maxLen) 97 { 98 maxLen = ret; 99 index = i+1; 100 } 101 if(i>0) 102 { 103 ret = lcp(i,j) * 2; 104 if(ret > maxLen) 105 { 106 maxLen = ret; 107 index = i; 108 } 109 } 110 } 111 printf("%d\n",maxLen); 112 } 113 114 /* 115 if(maxLen % 2 == 1) 116 { 117 for(i=index-maxLen/2-1;i<=index-1+maxLen/2;++i) 118 printf("%c",str[i]); 119 } 120 else 121 { 122 for(i=index-maxLen/2; i<=index-1+maxLen/2;++i) 123 printf("%c",str[i]); 124 } 125 puts("");*/ 126 127 128 }
6、给定一个字符串,求不相同的子串的个数(distinct substrings),每个子串都是某个后缀的前缀,如果按照suffix(sa[1]),suffix(sa[2])...suffix(sa[n])的顺序计算,可以发现
对于加进来的每个后缀,它将产生n-sa[i]个前缀,但是其中后height[i]个前缀是重复的,所以对于suffix(sa[i])将贡献n-sa[i]+height[i]个子串,累加之后就是答案。
spoj694
1 #include <stdio.h> 2 #include <string.h> 3 const int N = 50000 + 10; 4 int rank[N],sa[N],height[N],wa[N],wb[N],count[N],bucket[N],r[N]; 5 char str[N]; 6 7 void build_sa(int n, int m) 8 { 9 int *x=wa,*y=wb,*t,i,k,p; 10 for(i=0; i<m; ++i) count[i] = 0; 11 for(i=0; i<n; ++i) count[x[i] = r[i]]++; 12 for(i=1; i<m; ++i) count[i] += count[i-1]; 13 for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i; 14 for(k=1; k<=n; k<<=1) 15 { 16 for(p=0,i=n-k; i<n; ++i) y[p++] = i; 17 for(i=0; i<n; ++i) if(sa[i] >= k) y[p++] = sa[i] - k; 18 for(i=0; i<n; ++i) bucket[i] = x[y[i]]; 19 for(i=0; i<m; ++i) count[i] = 0; 20 for(i=0; i<n; ++i) count[bucket[i]]++; 21 for(i=1; i<m; ++i) count[i] += count[i-1]; 22 for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i]; 23 t = x; x = y; y = t; 24 p =1 ;x[sa[0]] = 0; 25 for(i=1; i<n; ++i) 26 x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++; 27 m = p; 28 } 29 } 30 void getHeight(int n) 31 { 32 int i,j,k = 0; 33 for(i=1; i<=n; ++i) rank[sa[i]] = i; 34 for(i=0; i<n; height[rank[i++]]=k) 35 for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); 36 } 37 38 int solve(int n) 39 { 40 int ans = 0,i; 41 for(i=1; i<=n; ++i) 42 ans += n - sa[i] - height[i]; 43 return ans; 44 } 45 int main() 46 { 47 int t,n,i; 48 scanf("%d",&t); 49 while(t--) 50 { 51 scanf("%s",str); 52 n = strlen(str); 53 for(i=0; i<n; ++i) 54 r[i] = str[i] ; 55 r[n] = 0; 56 build_sa(n+1,150); 57 getHeight(n); 58 printf("%d\n",solve(n)); 59 } 60 return 0; 61 }
以下是两个字符串相关的问题:
7、给定2个字符串,求2个字符串的最长公共子串(longest common substrings),可转换为求2个字符串后缀的最长公共前缀,将两个字符串连起来,中间用个没出现过的字符分割
求出height数组后,扫描出height数组中的最大值即可,当然,要求组sa[i]属于一个串,sa[i-1]属于另一个串。
poj2774
1 #include <stdio.h> 2 #include <string.h> 3 const int N = 100000+100000 + 10; 4 int rank[N],wa[N],wb[N],height[N],count[N],bucket[N],sa[N],r[N]; 5 char str[N]; 6 7 8 void build_sa(int n, int m) 9 { 10 int *x=wa,*y=wb,*t,i,k,p; 11 for(i=0; i<m; ++i) count[i] = 0; 12 for(i=0; i<n; ++i) count[x[i]=r[i]]++; 13 for(i=1; i<m; ++i) count[i] += count[i-1]; 14 for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i; 15 for(k=1; k<=n; k<<=1) 16 { 17 for(p=0,i=n-k;i<n; ++i) y[p++] = i; 18 for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k; 19 for(i=0; i<n; ++i) bucket[i] = x[y[i]]; 20 for(i=0; i<m; ++i) count[i] = 0; 21 for(i=0; i<n; ++i) count[bucket[i]]++; 22 for(i=1; i<m; ++i) count[i] += count[i-1]; 23 for(i=n-1; i>=0; --i) sa[--count[bucket[i]]] = y[i]; 24 t = x; x = y; y = t; 25 p = 1;x[sa[0]] = 0; 26 for(i=1; i<n; ++i) 27 x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1:p++; 28 if(p>=n) break; 29 m = p;//没这句就会runtime error,想不通 30 } 31 } 32 void getHeight(int n) 33 { 34 int i,j,k=0; 35 for(i=1; i<=n; ++i) rank[sa[i]] = i; 36 for(i=0; i<n; height[rank[i++]]=k) 37 for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); 38 } 39 40 int solve(int n, int m) 41 { 42 int max = 0; 43 int i; 44 for(i=1; i<=n+m+1; ++i) 45 { 46 if(height[i] > max) 47 { 48 if(sa[i]>=n+1 && sa[i-1]<n) 49 max = height[i]; 50 else if(sa[i]<n && sa[i-1]>=n+1) 51 max = height[i]; 52 } 53 } 54 return max; 55 } 56 int main() 57 { 58 int n,m,j,i; 59 while(scanf("%s",str)!=EOF) 60 { 61 n = strlen(str); 62 for(i=0; i<n; ++i) 63 r[i] = str[i] - 'a' + 2; 64 r[n] = 1; 65 scanf("%s",str); 66 m = strlen(str); 67 for(j=0,i=n+1; i<=m+n; ++i,++j) 68 r[i] = str[j] - 'a' + 2; 69 r[n+m+1] = 0; 70 build_sa(n+m+2,30); 71 getHeight(n+m+1); 72 printf("%d\n",solve(n,m)); 73 } 74 }
8、给定2个字符串,求出两个字符串所有公共子串中长度大于k的子串的个数,将两个字符串连起来,中间用个没出现过的字符分割
求出height数组后,将height[i]>k的分为一组,然后组内进行扫描,但是要用一个单调栈来维护后缀值。
poj3415
1 /* 2 长度不小于k的公共子串的个数 3 将连续的height[i]>=k的分为一组 4 */#include <stdio.h> 5 #include <string.h> 6 typedef __int64 LL; 7 const int N = 200005; 8 int rank[N],r[N],height[N],sa[N],wa[N],wb[N],count[N]; 9 char str[N]; 10 int stack[N][2]; 11 int k; 12 void build_sa(int n, int m) 13 { 14 int *x=wa,*y=wb,*t,i,p,k; 15 for(i=0; i<m; ++i) count[i] = 0; 16 for(i=0; i<n; ++i) count[x[i] = r[i]] ++; 17 for(i=1; i<m; ++i) count[i] += count[i-1]; 18 for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i; 19 for(k=1; k<=n; k<<=1) 20 { 21 for(p=0,i=n-k; i<n; ++i) y[p++] = i; 22 for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k; 23 for(i=0; i<m; ++i) count[i] = 0; 24 for(i=0; i<n; ++i) count[x[y[i]]]++; 25 for(i=1; i<m; ++i) count[i] += count[i-1]; 26 for(i=n-1; i>=0; --i) sa[--count[x[y[i]]]] = y[i]; 27 t = x; x = y; y = t; 28 p = 1; x[sa[0]] = 0; 29 for(i=1; i<n; ++i) 30 x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++; 31 m = p; 32 } 33 } 34 void getHeight(int n) 35 { 36 int i,j,k = 0; 37 for(i=1; i<=n; ++i) rank[sa[i]] = i; 38 for(i=0; i<n; height[rank[i++]]=k) 39 for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); 40 41 } 42 43 44 int main() 45 { 46 int n ,m, i,j; 47 LL total,top,sum; 48 while(scanf("%d",&k)!=EOF) 49 { 50 if(k==0) 51 break; 52 sum = 0; 53 scanf("%s",str); 54 n = strlen(str); 55 for(i=0; i<n; ++i) 56 r[i] = str[i] ; 57 r[n] = 1; 58 scanf("%s",str); 59 m = strlen(str); 60 for(i=n+1,j=0;j<m; ++j,++i) 61 r[i] = str[j] ; 62 r[n+m+1] = 0; 63 build_sa(n+m+2,130); 64 getHeight(n+m+1); 65 total = 0,top = 0; 66 for(i=1; i<=n+m+1; ++i) 67 { 68 if(height[i] < k) total = 0,top = 0; 69 else 70 { 71 int cnt = 0; 72 if(sa[i-1] < n) cnt++,total += height[i] - k + 1; 73 while(top>0 && height[i]<=stack[top-1][0]) 74 { 75 top--; 76 total -= (stack[top][0] - height[i]) * stack[top][1]; 77 cnt += stack[top][1]; 78 } 79 stack[top][0] = height[i];stack[top++][1] = cnt; 80 if(sa[i] > n) sum += total; 81 } 82 } 83 total = 0,top = 0; 84 for(i=1; i<=n+m+1; ++i) 85 { 86 if(height[i] < k) total = 0,top = 0; 87 else 88 { 89 int cnt = 0; 90 if(sa[i-1] > n) cnt++,total += height[i] - k + 1; 91 while(top>0 && height[i]<=stack[top-1][0]) 92 { 93 top--; 94 total -= (stack[top][0] - height[i]) * stack[top][1]; 95 cnt += stack[top][1]; 96 } 97 stack[top][0] = height[i];stack[top++][1] = cnt; 98 if(sa[i] < n) sum += total; 99 } 100 } 101 printf("%I64d\n",sum); 102 } 103 }
9、给定n个字符串,求出最长公共子串,要求这个子串至少出现在k(k<=n)个字符串中,求出height数组后,二分答案,然后将height分组,然后判断组内的后缀是否属于k个以上的原字符串
poj3294
1 /* 2 给定n个字符串,求最长子串的长度,要求该子串至少出现在k个字符串中 3 二分答案,找到一组height[i]>=k且组中有k个以上的后缀属于不同的字符串 4 */ 5 #include <stdio.h> 6 #include <string.h> 7 const int N = 200000+10; 8 int rank[N],r[N],height[N],sa[N],wa[N],wb[N],count[N],mark[N],vis[111],index[111]; 9 char str[N]; 10 int k,c,cc; 11 void build_sa(int n, int m) 12 { 13 int *x=wa,*y=wb,*t,i,p,k; 14 for(i=0; i<m; ++i) 15 count[i] = 0; 16 17 for(i=0; i<n; ++i) 18 count[x[i] = r[i]] ++; 19 for(i=1; i<m; ++i) count[i] += count[i-1]; 20 for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i; 21 for(k=1; k<=n; k<<=1) 22 { 23 for(p=0,i=n-k; i<n; ++i) y[p++] = i; 24 for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k; 25 for(i=0; i<m; ++i) count[i] = 0; 26 for(i=0; i<n; ++i) count[x[y[i]]]++; 27 for(i=1; i<m; ++i) count[i] += count[i-1]; 28 for(i=n-1; i>=0; --i) sa[--count[x[y[i]]]] = y[i]; 29 t = x; x = y; y = t; 30 p = 1; x[sa[0]] = 0; 31 for(i=1; i<n; ++i) 32 x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++; 33 m = p; 34 } 35 } 36 void getHeight(int n) 37 { 38 int i,j,k = 0; 39 for(i=1; i<=n; ++i) rank[sa[i]] = i; 40 for(i=0; i<n; height[rank[i++]]=k) 41 for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); 42 43 } 44 bool check(int n, int len) 45 { 46 memset(vis,0,sizeof(vis)); 47 int i; 48 int cnt = 0; 49 bool flag = false; 50 c = 0; 51 bool first = true; 52 for(i=1; i<=n; ++i) 53 { 54 if(height[i] < len) 55 { 56 memset(vis,0,sizeof(vis)); 57 first = true; 58 cnt = 0; 59 } 60 else 61 { 62 if(!vis[mark[sa[i-1]]]) 63 vis[mark[sa[i-1]]]++,cnt++; 64 if(!vis[mark[sa[i]]]) 65 vis[mark[sa[i]]]++,cnt++; 66 if(cnt>=k) 67 { 68 if(first) 69 index[c++] = sa[i-1]; 70 first = false; 71 flag = true; 72 } 73 } 74 } 75 if(flag) 76 { 77 cc = c; 78 return true; 79 } 80 81 return false; 82 } 83 int solve(int n) 84 { 85 int l = 0,r = n,mid; 86 int ans = 0,i; 87 while(l<=r) 88 { 89 mid = (l + r) >> 1; 90 if(check(n,mid)) 91 { 92 ans = mid; 93 l = mid + 1; 94 } 95 else 96 r = mid - 1; 97 } 98 return ans; 99 } 100 int main() 101 { 102 freopen("in.txt","r",stdin); 103 int n,m,i,j,len; 104 int t = 0; 105 while(scanf("%d",&n),n) 106 { 107 memset(height,0,sizeof(height)); 108 len = c = cc = 0; 109 memset(mark,0,sizeof(mark)); 110 for(j=1; j<=n; ++j) 111 { 112 scanf("%s",str); 113 m = strlen(str); 114 for(i=0;i<m; ++i) 115 { 116 r[len] = str[i] + 100; 117 mark[len++] = j; 118 } 119 r[len++] = n-j; 120 } 121 build_sa(len,300); 122 getHeight(len-1); 123 k = n/2+1; 124 t++; 125 if(t!=1) 126 puts(""); 127 int max = solve(len); 128 if(max!=0) 129 { 130 for(i=0; i<cc; ++i) 131 { 132 for(j=0; j<max; ++j) 133 printf("%c",r[j+index[i]]-100); 134 puts(""); 135 } 136 } 137 else 138 puts("?"); 139 140 } 141 }
10、每个字符串中至少出现2次且不重叠的最长子串,求出height数组中,二分答案,然后将height分组,然后判断是否符号要求,怎么判断,根据上面的经验就可以知道了,不多说了。
spoj220
1 /*http://www.spoj.com/problems/PHRASES/ 2 给定n个字符串,求最长子串的长度,要求该子串至少出现在k个字符串中 3 二分答案,找到一组height[i]>=k且组中有k个以上的后缀属于不同的字符串 4 */ 5 #include <stdio.h> 6 #include <string.h> 7 const int N = 200000+10; 8 const int INF = 1<<30; 9 int rank[N],r[N],height[N],sa[N],wa[N],wb[N],count[N],mark[N],idx[N][11],cnt[11]; 10 char str[N]; 11 int m; 12 void build_sa(int n, int m) 13 { 14 int *x=wa,*y=wb,*t,i,p,k; 15 for(i=0; i<m; ++i) 16 count[i] = 0; 17 18 for(i=0; i<n; ++i) 19 count[x[i] = r[i]] ++; 20 for(i=1; i<m; ++i) count[i] += count[i-1]; 21 for(i=n-1; i>=0; --i) sa[--count[x[i]]] = i; 22 for(k=1; k<=n; k<<=1) 23 { 24 for(p=0,i=n-k; i<n; ++i) y[p++] = i; 25 for(i=0; i<n; ++i) if(sa[i]>=k) y[p++] = sa[i] - k; 26 for(i=0; i<m; ++i) count[i] = 0; 27 for(i=0; i<n; ++i) count[x[y[i]]]++; 28 for(i=1; i<m; ++i) count[i] += count[i-1]; 29 for(i=n-1; i>=0; --i) sa[--count[x[y[i]]]] = y[i]; 30 t = x; x = y; y = t; 31 p = 1; x[sa[0]] = 0; 32 for(i=1; i<n; ++i) 33 x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++; 34 m = p; 35 } 36 } 37 void getHeight(int n) 38 { 39 int i,j,k = 0; 40 for(i=1; i<=n; ++i) rank[sa[i]] = i; 41 for(i=0; i<n; height[rank[i++]]=k) 42 for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); 43 44 } 45 46 bool check(int n, int len) 47 { 48 int i,j,k,mn,mx; 49 int is_ok = 0; 50 memset(cnt,0,sizeof(cnt)); 51 for(k=1; k<=n; ++k) 52 { 53 if(height[k] < len) 54 { 55 56 for(i=1; i<=m; ++i) 57 { 58 mn = INF;mx = 0; 59 for(j=0; j<cnt[i]; ++j) 60 { 61 if(idx[j][i] > mx) 62 mx = idx[j][i]; 63 if(idx[j][i] < mn) 64 mn = idx[j][i]; 65 } 66 if(mx - mn >= len) 67 is_ok++; 68 69 } 70 if(is_ok==m) 71 break; 72 is_ok = 0; 73 memset(cnt,0,sizeof(cnt)); 74 } 75 else 76 { 77 idx[cnt[mark[sa[k]]]++][mark[sa[k]]] = sa[k]; 78 idx[cnt[mark[sa[k-1]]]++][mark[sa[k-1]]] = sa[k-1]; 79 80 } 81 } 82 if(is_ok==m) 83 return true; 84 return false; 85 } 86 int solve(int n) 87 { 88 int l = 0,r = n, mid; 89 int ans = 0; 90 while(l<=r) 91 { 92 mid = (l+r)>>1; 93 if(check(n,mid)) 94 { 95 ans = mid; 96 l = mid + 1; 97 } 98 else 99 r = mid - 1; 100 } 101 return ans; 102 } 103 int main() 104 { 105 106 int t,n,i,j,len; 107 scanf("%d",&t); 108 while(t--) 109 { 110 n = 0; 111 memset(mark,0,sizeof(mark)); 112 scanf("%d",&m); 113 for(i=1; i<=m; ++i) 114 { 115 scanf("%s",str); 116 len = strlen(str); 117 for(j=0; j<len; ++j) 118 { 119 r[n] = str[j]; 120 mark[n++] = i; 121 } 122 r[n++] = m - i; 123 } 124 build_sa(n,130); 125 getHeight(n-1); 126 printf("%d\n",solve(n)); 127 } 128 }