后缀数组学习笔记

后缀数组的用处:快速求出两个后缀Suffix(i), Suffix(j)的最长公共前缀(LCP, Longest Common Perfix)

后缀数组的应用

先提出后缀数组的几种常用技巧:

  1. 建议多找找与height数组的关联。
  2. 将几个字符串贴在一起,用特殊符号间隔开:如aab与aaab,可合并成aab$aaab。
  3. 二分+分组(思想)的方法:枚举出答案后,就能将合法的情况划分到一个组判断。

以下列举出几个后缀数组的应用供大家思考。

  1. 求两个后缀的最长公共前缀
  2. 求字符串的可重叠的最长重复子串:如ababa可重叠的最长重复子串是aba
  3. 求字符串的不可重叠的最长重复子串:如ababa不可重叠的最长重复子串是ab 
  4. 计算不相同子串的个数:如aaaa的不相同子串数是4
  5. 计算最长回文子串:如aabaaaab的最长回文子串是6(baaaab)。
  6. 求两个字符串的最长公共子串:如aaba与abac的最长公共子串是aba。

以下一张图片可谓简洁明了。

后缀数组学习笔记_第1张图片

 

下面贴上模板

1.求最长重复子串,可以重叠

void solve_duplicate_substr(int n){//duplicate available

2.求最长重复子串,不可重叠

void slove_update_duplicate_substr(int n){//duplicate unavailable

  

 Source Code:

  1 //#pragma comment(linker, "/STACK:16777216") //for c++ Compiler
  2 #include <stdio.h>
  3 #include <iostream>
  4 #include <fstream>
  5 #include <cstring>
  6 #include <cmath>
  7 #include <stack>
  8 #include <string>
  9 #include <map>
 10 #include <set>
 11 #include <list>
 12 #include <queue>
 13 #include <vector>
 14 #include <algorithm>
 15 #define Max(a,b) (((a) > (b)) ? (a) : (b))
 16 #define Min(a,b) (((a) < (b)) ? (a) : (b))
 17 #define Abs(x) (((x) > 0) ? (x) : (-(x)))
 18 #define MOD 1000000007
 19 #define pi acos(-1.0)
 20 
 21 using namespace std;
 22 
 23 typedef long long           ll      ;
 24 typedef unsigned long long  ull     ;
 25 typedef unsigned int        uint    ;
 26 typedef unsigned char       uchar   ;
 27 
 28 template<class T> inline void checkmin(T &a,T b){if(a>b) a=b;}
 29 template<class T> inline void checkmax(T &a,T b){if(a<b) a=b;}
 30 
 31 const double eps = 1e-7      ;
 32 const int N = 1              ;
 33 const int M = 200000         ;
 34 const ll P = 10000000097ll   ;
 35 const int INF = 0x3f3f3f3f   ;
 36 
 37 int a[M], sa[M], h[M], rank[M];
 38 
 39 void radix(int *str, int *a, int *b, int n, int m){
 40     static int count[M];
 41     int i;
 42     memset(count, 0, sizeof(count));
 43     for(i = 0; i < n; ++i)  ++count[str[a[i]]];
 44     for(i = 1; i <= m; ++i) count[i] += count[i - 1];
 45     for(i = n - 1; i >= 0; --i) b[--count[str[a[i]]]] = a[i];
 46 }
 47 
 48 void suffix_array(int *str, int *sa, int n, int m){
 49     static int a[M], b[M];
 50     int i, j;
 51     for(i = 0; i < n; ++i)  rank[i] = i;
 52     radix(str, rank, sa, n, m);
 53 
 54     rank[sa[0]] = 0;
 55     for(i = 1; i < n; ++i)  rank[sa[i]] = rank[sa[i - 1]] + (str[sa[i]] != str[sa[i - 1]]);
 56     for(i = 0; 1<<i < n; ++i){
 57         for(j = 0; j < n; ++j){
 58             a[j] = rank[j] + 1;
 59             b[j] = j + (1 << i) >= n ? 0 : rank[j + (1 << i)] + 1;
 60             sa[j] = j;
 61         }
 62         radix(b, sa, rank, n, n);
 63         radix(a, rank, sa, n, n);
 64         rank[sa[0]] = 0;
 65         for(j = 1; j < n; ++j){
 66             rank[sa[j]] = rank[sa[j - 1]] + (a[sa[j - 1]] != a[sa[j]] || b[sa[j - 1]] != b[sa[j]]);
 67         }
 68     }
 69 }
 70 
 71 void calc_height(int *str, int *sa, int *h, int n){
 72     int i, k = 0;
 73     h[0] = 0;
 74     for(i = 0; i < n; ++i){
 75         k = k == 0 ? 0 : k - 1;
 76         if(rank[i] != 0){
 77             while(str[i + k] == str[sa[rank[i] - 1] + k]){
 78                 ++k;
 79             }
 80         }
 81         h[rank[i]] = k;
 82     }
 83 }
 84 
 85 void solve_duplicate_substr(int n){//duplicate available
 86     int i, j, pos, ans = 0;
 87     for(i = 0; i < n; ++i){
 88         if(h[rank[i]] > ans){
 89             ans = h[rank[i]];
 90             pos = i;
 91         }
 92     }
 93     for(i = pos; i < pos + ans; ++i){
 94         printf("%c", a[i]);
 95     }
 96     printf("\n");
 97 }
 98 
 99 void slove_update_duplicate_substr(int n){//duplicate unavailable
100     int i, j;
101     int low = 1, high = n;
102     int ans = 0, pos1 = 0, pos2 = 0;
103     while(low <= high){
104         int mid = (low + high) / 2;
105         bool flag = false;
106         for(i = 0; i < n; ){
107             j = i + 1;
108             int minPos = sa[i], maxPos = sa[i];
109             while(j < n && h[j] >= mid){
110                 minPos = min(minPos, sa[j]);
111                 maxPos = max(maxPos, sa[j]);
112                 ++j;
113             }
114             if(maxPos - minPos >= mid){
115                 flag = true;
116                 if(mid > ans){
117                     ans = mid;
118                     pos1 = minPos;
119                     pos2 = maxPos;
120                 }
121                 break;
122             }
123             i = j;
124         }
125         if(flag)    low = mid + 1;
126         else    high = mid - 1;
127     }
128     for(i = pos1; i < pos1 + ans; ++i){
129         printf("%c", a[i]);
130     }
131     printf("\n");
132 }
133 
134 int main(){
135     int i, j, t, n, m, k;
136     string str;
137     while(cin >> str){
138         copy(str.begin(), str.end(), a);
139         n = str.length();
140         suffix_array(a, sa, n, 256);
141         calc_height(a, sa, h, n);
142         solve_duplicate_substr(n);
143         slove_update_duplicate_substr(n);
144     }
145     return 0;
146 }

 

你可能感兴趣的:(学习笔记)