曾经有几次想写后缀数组,结果被论文中缩成豆腐块的程序吓到了.......
现在看懂了之后, 被这个程序写的丑陋吓到了。
丑虽丑,但是写的还是很精巧的, 照着打吧。
后缀数组其实就是吧一个长度为n的字符串(当然也可以是其他的,只要能比大小)的所有后缀排序后存在数组里。
打裸是O(n*n*log(n))的。
其实如果以每个点为开头,分别求出向后延伸1, 2, 4 .... 的排列顺序,后者是可以有前者推出的,配合基数排序,就可以把复杂度降低到O (n log (n));
当然, 代码很精巧,也很丑。
把求出的后缀排序后的首字母放在sa数组中, 用sa 退出height (height (i) = suffix(i)与 suffix(pre-rank(i)的最长公共前缀), 有这么一个定理 height[ rank[i ]] >= height[ rank[i -1]]-1 , 按照rank的顺序来求, 用变量存上一个rank的height, 由于这个变量顶多减少n次, 类似kmp的复杂度分析,这是o(n) 的。
用height就可以求很多东西了。
poj 1743 求不可重叠最长重复字串。
求出height后,二分答案,把height数组 分成若干份, 每份内的height都大于二分的数,观察每份内,是否存在解即可。
nlogn
当然,这道题规定,如果一段数同时减去一个数后, 与另一段数相同,也算作相同,这里就要用差分思想解决了。
# include <cstdlib> # include <cstdio> # include <cmath> # include <cstring> using namespace std; const int maxn = 100000, oo = 1073741819; int pret[maxn], s[maxn], height[maxn], sa[maxn], stmp[maxn], lis[maxn], rank[maxn], rk[maxn]; int p, n, l, r, mid, i; void read() { int i; memset(s, 0, sizeof(s)); memset(sa, 0, sizeof(sa)); scanf("%d", &n); for (i = 1; i<= n; i++) scanf("%d", &s[i]); for (i = 1; i< n; i++) s[i] = s[i+1]-s[i]; s[n] = 0; n--; for (i = 1; i<= n; i++) s[i] += 89; } void work_suffix(int n, int m) { int i, j; memset(pret, 0, sizeof(pret)); for (i = 1; i <= n; i++) pret[rk[i]=s[i]]++; for (i = 1; i <= m; i++) pret[i]+= pret[i-1]; for (i = n; i >= 1; i--) sa[pret[rk[i]]--] = i; for (j = 1, p = 0; p < n; j <<=1, m = p) { for (p = 0, i = n-j+1; i <=n; i++) lis[++p] = i; for (i = 1; i <=n ; i++) if (sa[i]>j) lis[++p] = sa[i] - j; memset(pret, 0, sizeof(pret)); for (i = 1; i <=n ; i++) stmp[i] = rk[lis[i]]; for (i = 1; i <=n ; i++) pret[stmp[i]]++; for (i = 1; i <=m ; i++) pret[i]+= pret[i-1]; for (i = n; i >=1 ; i--) sa[pret[stmp[i]]--] = lis[i]; for (i = 1; i <=n ; i++) lis[i] = rk[i]; for (p = 1, rk[sa[1]] = 1, i = 2; i <= n; i++) rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j])? p:++p; } } void work_height() { int i, j, p = 0; for (i = 1; i <= n; i++) rank[sa[i]] = i; for (i = 1; i < n; height[rank[i++]] = p) for (p?p--:0,j = sa[rank[i]-1]; j+p <=n+1 && i+p <=n+1 && s[j+p]==s[i+p]; p++); } bool check(int mid) { int ll,rr; for (i = 1; i <= n; i++) { if (height[i] < mid) ll = oo, rr = -oo; ll = ll > sa[i] ? sa[i]:ll; rr = rr < sa[i] ? sa[i]:rr; if (rr- ll >= mid) return true; } return false; } int main() { freopen("1743.in", "r", stdin); freopen("1743.out", "w", stdout); for (;;) { read(); if (n == -1) break; s[++n]= 0; work_suffix(n, 1000); work_height(); for (l = 0,r = n; l < r;) { if (check(mid = (l+r+1 >>1))) l = mid; else r = mid-1; } l++; printf("%d\n", l >= 5? l:0); } return 0; }
poj 3261 求重复k次的最长重复字串, 可重叠。
有木有发现和前面一道题是一样的二分检验?
# include <cstdlib> # include <cstdio> # include <cstring> using namespace std; const int oo = 1073741819, maxn = 30000; int rank[maxn], a[maxn], pret[1000000+5], rk[maxn]; int sa[maxn], lis[maxn], stmp[maxn], height[maxn]; int i , n, k; void read() { int i; scanf("%d%d", &n, &k); for (i = 1; i <= n; i++) scanf("%d", &a[i]), a[i]++; a[++n]=0; } void work_suffix(int n, int m) { int i, j, p; for (i=1; i<=m; i++) pret[i] = 0; for (i=1; i<=n; i++) pret[rk[i] = a[i]]++; for (i=1; i<=m; i++) pret[i]+= pret[i-1]; for (i=n; i>=1; i--) sa[pret[a[i]]--] = i; for (j = 1, p = 0; p <n; j <<=1, m = p) { for (p = 0, i=n-j+1; i<=n; i++) lis[++p] = i; for (i=1; i<=n; i++) if (sa[i] > j) lis[++p] = sa[i]-j; for (i=1; i<=m; i++) pret[i] = 0; for (i=1; i<=n; i++) stmp[i]=rk[lis[i]]; for (i=1; i<=n; i++) pret[stmp[i]]++; for (i=1; i<=m; i++) pret[i]+=pret[i-1]; for (i=n; i>=1; i--) sa[pret[stmp[i]]--]=lis[i]; for (i=1; i<=n; i++) lis[i]=rk[i]; for (rk[sa[1]]=1, i=2, p=1; i<=n; i++) rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j]) ? p:++p; } } bool check(int mid) { int i, ll, rr; for (i=1; i<=n; i++) { if (height[i] < mid ) ll = oo, rr = -oo; if (i < ll) ll = i; if (i > rr) rr = i; if (rr - ll >= k-1) return true; } return false; } void work_height() { int i,j,p = 0; for (i=1; i<=n; i++) rank[sa[i]] = i; for (i=1, p=0; i<=n; height[rank[i++]] = p) for (p?p--:0, j=sa[rank[i]-1];a[j+p]==a[i+p]; p++); } int main() { freopen("3261.in","r", stdin); freopen("3261.out", "w", stdout); read(); work_suffix(n, 1000000+5); work_height(); int mid, l = 0, r = n; for (;l < r;) { if (check(mid = (l+r+1 >> 1))) l = mid; else r = mid-1; } printf("%d", l); return 0; }
给定字符串l,它是由字串s重复k次得到的,求k的最大值。
同样的求height, 枚举s的长度,首先strlen(l)% strlen(s) == 0, 其次, suffix(1)与suffix(1+strlen(s)) 的最长公共前缀 = n - strlen(s), 画个图就很明了了。
当然,这道题不是出给后缀数组的,是出个kmp的,数据出到了100 0000, 倍增是过不了的, 只能用丑陋的DC3
贴个kmp的算了:
# include <cstdlib> # include <cstdio> # include <cmath> # include <cstring> const int maxn = 1000000+5; char s[maxn]; int n,i,j, next[maxn]; int main() { //freopen("2406.in", "r", stdin); //freopen("2406.out", "w", stdout); for (;;) { memset(s, 0, sizeof(s)); scanf("%s\n", s+1); n = strlen(s+1); if (s[1] == '.') return 0; for (i = 2, j = 0; i <= n; next[i++] = j) { for (;j > 0 && s[j+1] != s[i];j = next[j]); if (s[i] == s[j+1]) j++; } printf("%d\n", n % (n - next[n]) == 0 ? n / (n - next[n]): 1); } return 0; }
poj 3294
求:对于n个字符串,至少出现n div 2+1 个字符串中的 最长字串。
把n个字符串用未出现且不同的字符连接, 同样的二分答案,分组,组内统计即可。
# include <cstdlib> # include <cstdio> # include <cstring> using namespace std; const int maxn = 100000+5000; char s[10000+5]; int height[maxn], pret[maxn], sa[maxn], rk[maxn], rank[maxn], stmp[maxn], lis[maxn], a[maxn],id[maxn]; int ans[1000+5]; bool have[1000+5]; int lim, n, i, j, key, len, tot; void read() { scanf("%d", &n); key = 30, tot = 0; for (i = 1; i <= n; i++) { scanf("%s", s+1); len = strlen(s+1); for (j = 1; j <=len; j++) a[++tot] = s[j] - 'a'+1, id[tot] = i; a[++tot] = key++; id[tot] = 0; } a[tot] = 0; } void work_suffix(int n, int m) { int i, j, p; for (i = 1; i<=m; i++) pret[i] = 0; for (i = 1; i<=n; i++) pret[rk[i]=a[i]]++; for (i = 1; i<=m; i++) pret[i]+= pret[i-1]; for (i = n; i>=1; i--) sa[pret[rk[i]]--] = i; for (j = 1, p = 0; p < n; j <<=1, m =p) { for (p = 0, i = n-j+1; i<=n; i++) lis[++p] = i; for (i = 1; i <=n; i++) if (sa[i]>j) lis[++p]=sa[i]-j; for (i = 1; i <=m; i++) pret[i] = 0; for (i = 1; i <=n; i++) stmp[i]= rk[lis[i]]; for (i = 1; i <=n; i++) pret[stmp[i]]++; for (i = 1; i <=m; i++) pret[i] += pret[i-1]; for (i = n; i >=1; i--) sa[pret[stmp[i]]--] = lis[i]; for (i = 1; i <=n; i++) lis[i] = rk[i]; for (i = 2, p = 1, rk[sa[1]] = 1; i <=n; i++) rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] ==lis[sa[i-1]+j]) ? p:++p; } } void work_height(int n) { int i ,j, p; for (i = 1; i<=n; i++) rank[sa[i]] = i; for (i = 1, p =0; i <=n; height[rank[i++]] = p) for (p?p--:0, j = sa[rank[i]-1]; a[i+p]== a[j+p]; p++); } bool check(int mid) { int all, i; if (mid == 0) return true; memset(ans, 0, sizeof(ans)); for (i = 1; i <=tot; i++) { if (height[i] < mid) { memset(have, false, sizeof(have)); all = 0; } if (id[sa[i]] != 0) { int old = all; if (have[id[sa[i]]] == false) all++, have[id[sa[i]]] = true; if (all == n / 2 +1 && old < n / 2+1) ans[++ans[0]] = sa[i]; } } if (ans[0] != 0 ) return true; return false; } int main() { for (;;) { read(); if (n == 0) break; if (n == 1) printf("?\n"); if (n == 1) continue; work_suffix(tot, key+1); work_height(tot); int l, r, mid; lim = n / 2 +1; for (l = 0, r = tot; l <r;) { if (check(mid = l+r+1 >> 1)) l = mid; else r = mid-1; } check(l); if (l == 0) printf("?\n"); else { // printf("%d\n", l); for (i = 1; i <= ans[0]; i++) { for (j = 1; j <=l; j++) printf("%c", a[ans[i]+j-1] + 'a' -1); printf("\n"); } } printf("\n"); } return 0; }
# include <cstdlib> # include <cstdio> # include <cmath> # include <cstring> using namespace std; const int maxn = 300000; int height[maxn],id[maxn],pret[maxn],sa[maxn],stmp[maxn],rk[maxn],a[maxn],lis[maxn],rank[maxn]; int tot, i , n , ans; char s[maxn / 2]; void read() { int i, len; scanf("%s\n", s+1); len = strlen(s+1); for (i = 1; i <= len; i++) a[++tot] = s[i]-'a' +1, id[tot] = 1; scanf("%s\n", s+1); len = strlen(s+1); for (a[++tot] = 30, i = 1; i <= len; i++) a[++tot] = s[i]-'a' +1, id[tot] = 2; a[++tot] = 0; } void work_suffix(int n, int m) { int i , j, p; for (i = 1; i <= m; i++) pret[i] = 0; for (i = 1; i <= n; i++) pret[rk[i] = a[i]] ++; for (i = 1; i <= m; i++) pret[i] += pret[i-1]; for (i = n; i >= 1; i--) sa[pret[rk[i]]--] = i; for (p = 0, j = 1; p < n; j <<=1, m = p) { for (p = 0, i = n-j+1; i <= n; i++) lis[++p] = i; for (i = 1; i <=n; i++) if (sa[i]> j) lis[++p] = sa[i] - j; for (i = 1; i <=m; i++) pret[i] = 0; for (i = 1; i <=n; i++) stmp[i] = rk[lis[i]]; for (i = 1; i <=n; i++) pret[stmp[i]]++; for (i = 1; i <=m; i++) pret[i] += pret[i-1]; for (i = n; i >=1; i--) sa[pret[stmp[i]]--] = lis[i]; for (i = 1; i <=n; i++) lis[i] = rk[i]; for (i = 2, p = 1, rk[sa[1]] = 1; i<=n; i++) rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j])? p:++p; } } void work_height(int n) { int i ,j, p= 0; for (i = 1; i <=n; i++) rank[sa[i]] = i; for (i = 1; i <n; height[rank[i++]] = p) for (p?p--:0, j = sa[rank[i]-1]; a[i+p] == a[j+p]; ) p++; } int main() { freopen("2774.in", "r", stdin); freopen("2774.out", "w", stdout); read(); work_suffix(tot, 50); work_height(tot); for (i = 2; i <= tot; i++) if (id[sa[i]] != id[sa[i-1]]) ans = ans < height[i] ? height[i] : ans; printf("%d", ans); return 0; }