后缀数组的用处:快速求出两个后缀Suffix(i), Suffix(j)的最长公共前缀(LCP, Longest Common Perfix)
先提出后缀数组的几种常用技巧:
以下列举出几个后缀数组的应用供大家思考。
以下一张图片可谓简洁明了。
下面贴上模板
1.求最长重复子串,可以重叠
void solve_duplicate_substr(int n){//duplicate available
2.求最长重复子串,不可重叠
void slove_update_duplicate_substr(int n){//duplicate unavailable
Source Code:
1 //#pragma comment(linker, "/STACK:16777216") //for c++ Compiler 2 #include <stdio.h> 3 #include <iostream> 4 #include <fstream> 5 #include <cstring> 6 #include <cmath> 7 #include <stack> 8 #include <string> 9 #include <map> 10 #include <set> 11 #include <list> 12 #include <queue> 13 #include <vector> 14 #include <algorithm> 15 #define Max(a,b) (((a) > (b)) ? (a) : (b)) 16 #define Min(a,b) (((a) < (b)) ? (a) : (b)) 17 #define Abs(x) (((x) > 0) ? (x) : (-(x))) 18 #define MOD 1000000007 19 #define pi acos(-1.0) 20 21 using namespace std; 22 23 typedef long long ll ; 24 typedef unsigned long long ull ; 25 typedef unsigned int uint ; 26 typedef unsigned char uchar ; 27 28 template<class T> inline void checkmin(T &a,T b){if(a>b) a=b;} 29 template<class T> inline void checkmax(T &a,T b){if(a<b) a=b;} 30 31 const double eps = 1e-7 ; 32 const int N = 1 ; 33 const int M = 200000 ; 34 const ll P = 10000000097ll ; 35 const int INF = 0x3f3f3f3f ; 36 37 int a[M], sa[M], h[M], rank[M]; 38 39 void radix(int *str, int *a, int *b, int n, int m){ 40 static int count[M]; 41 int i; 42 memset(count, 0, sizeof(count)); 43 for(i = 0; i < n; ++i) ++count[str[a[i]]]; 44 for(i = 1; i <= m; ++i) count[i] += count[i - 1]; 45 for(i = n - 1; i >= 0; --i) b[--count[str[a[i]]]] = a[i]; 46 } 47 48 void suffix_array(int *str, int *sa, int n, int m){ 49 static int a[M], b[M]; 50 int i, j; 51 for(i = 0; i < n; ++i) rank[i] = i; 52 radix(str, rank, sa, n, m); 53 54 rank[sa[0]] = 0; 55 for(i = 1; i < n; ++i) rank[sa[i]] = rank[sa[i - 1]] + (str[sa[i]] != str[sa[i - 1]]); 56 for(i = 0; 1<<i < n; ++i){ 57 for(j = 0; j < n; ++j){ 58 a[j] = rank[j] + 1; 59 b[j] = j + (1 << i) >= n ? 0 : rank[j + (1 << i)] + 1; 60 sa[j] = j; 61 } 62 radix(b, sa, rank, n, n); 63 radix(a, rank, sa, n, n); 64 rank[sa[0]] = 0; 65 for(j = 1; j < n; ++j){ 66 rank[sa[j]] = rank[sa[j - 1]] + (a[sa[j - 1]] != a[sa[j]] || b[sa[j - 1]] != b[sa[j]]); 67 } 68 } 69 } 70 71 void calc_height(int *str, int *sa, int *h, int n){ 72 int i, k = 0; 73 h[0] = 0; 74 for(i = 0; i < n; ++i){ 75 k = k == 0 ? 0 : k - 1; 76 if(rank[i] != 0){ 77 while(str[i + k] == str[sa[rank[i] - 1] + k]){ 78 ++k; 79 } 80 } 81 h[rank[i]] = k; 82 } 83 } 84 85 void solve_duplicate_substr(int n){//duplicate available 86 int i, j, pos, ans = 0; 87 for(i = 0; i < n; ++i){ 88 if(h[rank[i]] > ans){ 89 ans = h[rank[i]]; 90 pos = i; 91 } 92 } 93 for(i = pos; i < pos + ans; ++i){ 94 printf("%c", a[i]); 95 } 96 printf("\n"); 97 } 98 99 void slove_update_duplicate_substr(int n){//duplicate unavailable 100 int i, j; 101 int low = 1, high = n; 102 int ans = 0, pos1 = 0, pos2 = 0; 103 while(low <= high){ 104 int mid = (low + high) / 2; 105 bool flag = false; 106 for(i = 0; i < n; ){ 107 j = i + 1; 108 int minPos = sa[i], maxPos = sa[i]; 109 while(j < n && h[j] >= mid){ 110 minPos = min(minPos, sa[j]); 111 maxPos = max(maxPos, sa[j]); 112 ++j; 113 } 114 if(maxPos - minPos >= mid){ 115 flag = true; 116 if(mid > ans){ 117 ans = mid; 118 pos1 = minPos; 119 pos2 = maxPos; 120 } 121 break; 122 } 123 i = j; 124 } 125 if(flag) low = mid + 1; 126 else high = mid - 1; 127 } 128 for(i = pos1; i < pos1 + ans; ++i){ 129 printf("%c", a[i]); 130 } 131 printf("\n"); 132 } 133 134 int main(){ 135 int i, j, t, n, m, k; 136 string str; 137 while(cin >> str){ 138 copy(str.begin(), str.end(), a); 139 n = str.length(); 140 suffix_array(a, sa, n, 256); 141 calc_height(a, sa, h, n); 142 solve_duplicate_substr(n); 143 slove_update_duplicate_substr(n); 144 } 145 return 0; 146 }