智商太低,花了半个多月总算把后缀数组那篇论文的题目刷完了
写个总结方便自己以后再深入学习,也希望能够帮助正在学习后缀数组的童鞋
首先ORZ 《后缀数组——处理字符串的有力工具》-罗穗骞
本文也是依照着这篇论文,加上自己的理解。
有需要的童鞋自己去下载吧~~~
首先必须先理解一下这三个数组(len 表示 字符串长度)
特别需要理解的是sa数组和height数组,每每做后缀数组的题目没有思路的时候,先把sa和height数组打出来,总会发现神奇的事情发生~~
观察height数组,就会发现,我可以将height数组按照k值分成很多块,保证每一块中的height数组的值都不小于k,这样就可以保证如果存不可重叠最长重复的子串,那么该子串必然为这些块中某个后缀的前缀。
然后再枚举这些块,保证在这些块中的两个后缀的sa数组的距离大于等于k (sa[i] – sa[j] >= k), 依据题目这个k还必须大于4,否则输出0
#include<iostream> #include<algorithm> #include<cstdio> #include<cstring> using namespace std; #define maxn 21000 int r[maxn],sa[maxn]; int t[maxn],t2[maxn],c[maxn]; int n; void da(int *r,int *sa,int n,int m) { int i, *x = t, *y = t2; //对r中长度为1的字串进行基数排序 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[i] = r[i]]++; for(i = 1; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[i]]] = i; for(int k = 1; k <= n; k <<= 1) { int p = 0; //第二关键字 for(i = n-k; i < n; i++) y[p++] = i; for(i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i]-k; //第一关键字 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[y[i]]]++; for(i = 0; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i]; swap(x, y); p = 1; x[sa[0]] = 0; for(i = 1; i < n; i++) x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++; if(p >= n) break; m = p; } } int Rank[maxn],height[maxn]; void calheight(int *r,int *sa,int n){ int i,j,k=0; for(i=1;i<=n;i++) Rank[sa[i]]=i; for(i=0;i<n;height[Rank[i++]]=k) for(k?k--:0,j=sa[Rank[i]-1];r[i+k]==r[j+k];k++); } bool Judge(int x) { int Max = sa[1], Min = sa[1]; for(int i = 2; i <= n; i++) { if(height[i]<x) Max = Min = sa[i]; else { Max = max(Max, sa[i]); Min = min(Min, sa[i]); if(Max - Min >= x) return 1; } } return 0; } void slove() { int ans = 0; int l = 4, r = n/2+1, mid; while(l <= r) { mid = (l + r) >> 1; if(Judge(mid)) ans = mid, l = mid+1; else r = mid-1; } if(ans < 4) puts("0"); else printf("%d\n", ans+1); } int main () { while(scanf("%d", &n), n) { n--; for(int i = 0; i <= n; i++) scanf("%d", &r[i]); if(n < 10){puts("0"); continue;} for(int i = 0; i < n; i++) r[i] = r[i]-r[i+1]+90; r[n] = 0; da(r, sa, n+1, 256); calheight(r, sa, n); slove(); } return 0; }
#include<iostream> #include<algorithm> #include<map> #include<cstdio> #include<cstring> using namespace std; #define maxn 1000100 int r[maxn],sa[maxn]; int t[maxn],t2[maxn],c[maxn]; int n, k; void da(int *r,int *sa,int n,int m) { int i, *x = t, *y = t2; //对r中长度为1的字串进行基数排序 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[i] = r[i]]++; for(i = 1; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[i]]] = i; for(int k = 1; k <= n; k <<= 1) { int p = 0; //第二关键字 for(i = n-k; i < n; i++) y[p++] = i; for(i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i]-k; //第一关键字 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[y[i]]]++; for(i = 0; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i]; swap(x, y); p = 1; x[sa[0]] = 0; for(i = 1; i < n; i++) x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++; if(p >= n) break; m = p; } } int Rank[maxn],height[maxn]; void calheight(int *r,int *sa,int n){ int i,j,k=0; for(i=1;i<=n;i++) Rank[sa[i]]=i; for(i=0;i<n;height[Rank[i++]]=k) for(k?k--:0,j=sa[Rank[i]-1];r[i+k]==r[j+k];k++); } bool Judge(int x) { int cnt = 1; for(int i = 2; i <= n; i++) { if(height[i]<x) cnt = 1; else { cnt++; if(cnt >= k) return 1; } } return 0; } void slove() { int ans = -1; int l = 1, r = n, mid; while(l <= r) { mid = (l + r) >> 1; if(Judge(mid)) ans = max(ans, mid), l = mid + 1; else r = mid - 1; } printf("%d\n", ans); } int main () { while(scanf("%d %d", &n, &k) != EOF) { for(int i = 0; i < n; i++) { scanf("%d", &r[i]); r[i]++; } r[n] = 0; da(r, sa, n+1, maxn); calheight(r, sa, n); slove(); } return 0; }
#include<cstdio> #include<cstring> #include<iostream> using namespace std; const int maxn = 51000; char str[maxn]; int r[maxn],sa[maxn]; int t[maxn],t2[maxn],c[maxn]; void da(int *r,int *sa,int n,int m) { int i, *x = t, *y = t2; //对r中长度为1的字串进行基数排序 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[i] = r[i]]++; for(i = 1; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[i]]] = i; for(int k = 1; k <= n; k <<= 1) { int p = 0; //第二关键字 for(i = n-k; i < n; i++) y[p++] = i; for(i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i]-k; //第一关键字 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[y[i]]]++; for(i = 0; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i]; swap(x, y); p = 1; x[sa[0]] = 0; for(i = 1; i < n; i++) x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++; if(p >= n) break; m = p; } } int Rank[maxn],height[maxn]; void calheight(int *r,int *sa,int n){ int i,j,k=0; for(i=1;i<=n;i++) Rank[sa[i]]=i; for(i=0;i<n;height[Rank[i++]]=k) for(k?k--:0,j=sa[Rank[i]-1];r[i+k]==r[j+k];k++); } int main () { int T; scanf("%d", &T); while(T--) { scanf("%s", str); int len = strlen(str); for(int i = 0; i < len; i++) r[i] = str[i] + 1; r[len] = 0; da(r, sa, len+1, 255); calheight(r, sa, len); int ans = len - sa[1] ; for(int i = 2; i <= len; i++) { ans += len - sa[i] - height[i]; } printf("%d\n", ans); } return 0; }
题意: 给你一个字符串,求出最长回文子串
题解: 额,这个题一直没想好怎么用后缀数组解。
不过新学了一个Manacher算法,该算法可以实现用O(n)实现回文子串的算法;
后缀数组的解法就以后再补上吧
#include<cstdio> #include<cstring> #define max(x,y) ((x)>(y) ? (x) : (y)) #define min(x,y) ((x)<(y) ? (x) : (y)) const int maxn = 1111; char str[maxn]; char s[maxn*2]; int p[maxn*2]; void get_s() { int len = strlen(str); s[0] = '$'; s[1] = '#'; for(int i = 0; i < len; i++) { s[i*2+2] = str[i]; s[i*2+3] = '#'; } len = len*2 + 2; s[len] = '\0'; } void get_p() { int len = strlen(s); //memset(p, 0, sizeof(p)); int id = 0, mx = 0; for(int i = 1; i < len; i++) { // p[i] = mx > i ? min(p[id*2-i],mx-i) : 1; if(mx > i) p[i] = min(p[id*2-i],p[id]+id-i); else p[i] = 1; while(s[i+p[i]] == s[i-p[i]]) p[i]++; if(mx < p[i] + i) { mx = p[i] + i; id = i; } } } int main () { while(scanf("%s",str) != EOF) { get_s(); get_p(); int id = 0, len = strlen(s); for(int i = 1; i < len; i++) { if(p[i] > p[id]) id = i; } for(int i = id - p[id]+1; i <= id + p[id] - 1; i++) { if(s[i] != '#') printf("%c", s[i]); } puts(""); } return 0; }
题意: 给你一个字符串,并且我们知道该字符串是由某个字符串S重复R次得到,需要你求出R的最大值
题解: 学过KMP的童鞋一定做过这道题,这是KMP的一道很经典的入门题。思路也是一样。
如果母串str是由字符串S重复4次得到的,那么必然有母串和下面的后缀suffix(k)的最长公共前缀为len-k的话。
因为如果母串和下面的后缀suffix(k)的最长公共前缀为len-k,即S1[1]=S2[1]&&S1[2]=S2[2]&&S1[3]=S2[3]
又因为本来S1[2] = S2[1] ,S1[3] = S2[2], S1[4] = S2[3]
那么就可以推出=》 S1[1] = S1[2] = S1[3] = S1[4] 了
所以这个问题就转化为了先枚举字符串S的长度K,然后再看母串也就是后缀suffix(0)和后缀suffix(k)的最长公共前缀为不为len-k了。
#include <stdio.h> #include<string.h> #define maxn 1000001 #define INF 999999999 #define MAX(x,y) ((x)>(y)?(x):(y)) #define MIN(x,y) ((x)<(y)?(x):(y)) int r[maxn*3],sa[maxn*3]; int ans[maxn]; char str[maxn*3]; #define F(x) ((x)/3+((x)%3==1?0:tb)) #define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2) int wa[maxn],wb[maxn],wv[maxn],ws[maxn]; int c0(int *r,int a,int b) {return r[a]==r[b]&&r[a+1]==r[b+1]&&r[a+2]==r[b+2];} int c12(int k,int *r,int a,int b) {if(k==2) return r[a]<r[b]||r[a]==r[b]&&c12(1,r,a+1,b+1); else return r[a]<r[b]||r[a]==r[b]&&wv[a+1]<wv[b+1];} void Sort(int *r,int *a,int *b,int n,int m) { int i; for(i=0;i<n;i++) wv[i]=r[a[i]]; for(i=0;i<m;i++) ws[i]=0; for(i=0;i<n;i++) ws[wv[i]]++; for(i=1;i<m;i++) ws[i]+=ws[i-1]; for(i=n-1;i>=0;i--) b[--ws[wv[i]]]=a[i]; return; } void dc3(int *r,int *sa,int n,int m) // r为待匹配数组 n为总长度 m为字符范围 { int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p; r[n]=r[n+1]=0; for(i=0;i<n;i++) if(i%3!=0) wa[tbc++]=i; Sort(r+2,wa,wb,tbc,m); Sort(r+1,wb,wa,tbc,m); Sort(r,wa,wb,tbc,m); for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;i++) rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++; if(p<tbc) dc3(rn,san,tbc,p); else for(i=0;i<tbc;i++) san[rn[i]]=i; for(i=0;i<tbc;i++) if(san[i]<tb) wb[ta++]=san[i]*3; if(n%3==1) wb[ta++]=n-1; Sort(r,wb,wa,ta,m); for(i=0;i<tbc;i++) wv[wb[i]=G(san[i])]=i; for(i=0,j=0,p=0;i<ta && j<tbc;p++) sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++]; for(;i<ta;p++) sa[p]=wa[i++]; for(;j<tbc;p++) sa[p]=wb[j++]; return; } int Rank[maxn],height[maxn]; void calheight(int *r,int *sa,int n) // 求height数组。 { int i,j,k=0; for(i=1;i<=n;i++) Rank[sa[i]]=i; for(i=0;i<n;height[Rank[i++]]=k) for(k?k--:0,j=sa[Rank[i]-1];r[i+k]==r[j+k];k++); return; } int f[maxn]; void fun(int n) { int minn = INF; int j = Rank[0]; for(int i = j ; i >= 1; i--) { f[i] = minn; minn = MIN(minn, height[i]); } minn = INF; for(int i = j+1; i <= n; i++) { minn = MIN(minn, height[i]); f[i] = minn; } } int main() { while(scanf("%s",str)!=EOF) { int n=strlen(str); if(!strcmp(str, ".")) break; for(int i = 0; i < n; i++) r[i] = str[i] - 'a' + 1; r[n] = 0; dc3(r,sa,n+1,256);//千万注意+1 calheight(r,sa,n); fun(n); for(int k = 1; k <= n; k++) { if(n % k == 0 && f[Rank[k]] == n-k) { if(n / k == 0) puts("1"); else printf("%d\n", n/k); break; } } } return 0; }
那么要求A、B字符串的最长公共子串,就是比较A的所有后缀和B的所有后缀最长公共前缀
枚举时间复杂度太高。利用后缀数组,我们可以先将两个字符串拼接起来,这样就可以很明显的发现height数组的最大值就是所求
当然同时要满足,构成这个height[i]的前后两个后缀分别是属于A或B
1.HDU 1403 Longest Common Substring
http://acm.hdu.edu.cn/showproblem.php?pid=1403
#include<iostream> #include<algorithm> #include<cstdio> #include<cstring> using namespace std; #define maxn 210000 char str[maxn]; int r[maxn],sa[maxn]; int t[maxn],t2[maxn],c[maxn]; void da(int *r,int *sa,int n,int m) { int i, *x = t, *y = t2; //对r中长度为1的字串进行基数排序 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[i] = r[i]]++; for(i = 1; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[i]]] = i; for(int k = 1; k <= n; k <<= 1) { int p = 0; //第二关键字 for(i = n-k; i < n; i++) y[p++] = i; for(i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i]-k; //第一关键字 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[y[i]]]++; for(i = 0; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i]; swap(x, y); p = 1; x[sa[0]] = 0; for(i = 1; i < n; i++) x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++; if(p >= n) break; m = p; } } int Rank[maxn],height[maxn]; void calheight(int *r,int *sa,int n){ int i,j,k=0; for(i=1;i<=n;i++) Rank[sa[i]]=i; for(i=0;i<n;height[Rank[i++]]=k) for(k?k--:0,j=sa[Rank[i]-1];r[i+k]==r[j+k];k++); } int main () { while(scanf("%s", str) != EOF) { int len = strlen(str); str[len] = '!'; scanf("%s", str+len+1); int n = strlen(str); for(int i = 0; i < n; i++) r[i] = (int)str[i]; r[n] = 0; // for(int i = 0; i < len ; i++) printf("%d\n", r[i]); da(r, sa, n+1, 256); //注意必须+1 calheight(r, sa, n); int ans = 0; for(int i = 2; i <= n; i++)//注意是小于等于n 杭电数据太弱…… { if(height[i] > ans &&((sa[i] < len && sa[i-1] > len)||(sa[i] > len && sa[i-1] < len))) ans = height[i]; } printf("%d\n", ans); } return 0; }
2.POJ 2774 Long Long Message
http://poj.org/problem?id=2774
#include<iostream> #include<algorithm> #include<cstdio> #include<cstring> using namespace std; #define maxn 200010 char str[maxn]; int r[maxn],sa[maxn]; int t[maxn],t2[maxn],c[maxn]; void da(int *r,int *sa,int n,int m) { int i, *x = t, *y = t2; //对r中长度为1的字串进行基数排序 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[i] = r[i]]++; for(i = 1; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[i]]] = i; for(int k = 1; k <= n; k <<= 1) { int p = 0; //第二关键字 for(i = n-k; i < n; i++) y[p++] = i; for(i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i]-k; //第一关键字 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[y[i]]]++; for(i = 0; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i]; swap(x, y); p = 1; x[sa[0]] = 0; for(i = 1; i < n; i++) x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++; if(p >= n) break; m = p; } } int Rank[maxn],height[maxn]; void calheight(int *r,int *sa,int n){ int i,j,k=0; for(i=1;i<=n;i++) Rank[sa[i]]=i; for(i=0;i<n;height[Rank[i++]]=k) for(k?k--:0,j=sa[Rank[i]-1];r[i+k]==r[j+k];k++); } int main () { while(scanf("%s", str) != EOF) { int len1 = strlen(str); for(int i = 0; i < len1; i++) r[i] = str[i] - 'a' + 2; r[len1] = 1; scanf("%s", str); int len2 = strlen(str); for(int i = len1+1; i < len1+len2+1; i++) r[i] = str[i-(len1+1)] - 'a' + 2; int n = len1 + len2 +1 ; r[n] = 0; da(r, sa, n+1, 30); calheight(r, sa, n); int ans = 0; for(int i = 2 ; i <= n; i++) { if(ans < height[i] && ((sa[i-1] < len1 && sa[i] > len1)||(sa[i-1] > len1 && sa[i] < len1))) ans = height[i]; } printf("%d\n", ans); } return 0; }
3.URAL 1517 Freedom of Choice
http://acm.timus.ru/problem.aspx?space=1&num=1517
#include<iostream> #include<algorithm> #include<cstdio> #include<cstring> using namespace std; #define maxn 210000 #define MAX(x,y) ((x) > (y) ? (x) : (y)) #define MIN(x,y) ((x) < (y) ? (x) : (y)) char str[maxn]; int r[maxn],sa[maxn]; int t[maxn],t2[maxn],c[maxn]; void da(int *r,int *sa,int n,int m) { int i, *x = t, *y = t2; //对r中长度为1的字串进行基数排序 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[i] = r[i]]++; for(i = 1; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[i]]] = i; for(int k = 1; k <= n; k <<= 1) { int p = 0; //第二关键字 for(i = n-k; i < n; i++) y[p++] = i; for(i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i]-k; //第一关键字 for(i = 0; i < m; i++) c[i] = 0; for(i = 0; i < n; i++) c[x[y[i]]]++; for(i = 0; i < m; i++) c[i] += c[i-1]; for(i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i]; swap(x, y); p = 1; x[sa[0]] = 0; for(i = 1; i < n; i++) x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++; if(p >= n) break; m = p; } } int Rank[maxn],height[maxn]; void calheight(int *r,int *sa,int n){ int i,j,k=0; for(i=1;i<=n;i++) Rank[sa[i]]=i; for(i=0;i<n;height[Rank[i++]]=k) for(k?k--:0,j=sa[Rank[i]-1];r[i+k]==r[j+k];k++); } int main () { int n; scanf("%d", &n); { scanf("%s", str); str[n] = '@'; scanf("%s", str+n+1); // printf("%s\n",str); int len = strlen(str); for(int i = 0; i < len; i++) r[i] = int(str[i]); r[len] = 0; //for(int i = 0; i <= len; i++) // printf("%d", r[i]); da(r, sa, len+1, 256); calheight(r, sa, len); int ans = 0, flag = 0; for(int i = 2; i <= len; i++) { if(height[i] > ans && ((sa[i] < n && sa[i-1] > n) || (sa[i] > n && sa[i-1] < n))) { ans = height[i]; flag = MIN(sa[i], sa[i-1]); } } //printf("%d %d\n",ans, flag); for(int i = flag; i < flag+ans; i++) printf("%c", str[i]); printf("\n"); } return 0; }
题目:POJ 3415
http://poj.org/problem?id=3415
题意: 给你两个字符串A、B,求长度不小于K的公共子串的个数
思路: 思路想一想就会发现很简单,不就是把A和B所有的后缀求一下最长公共前缀么,然后再把最长公共前缀的大于等于K的个数全部加起来就行了。这里有个问题,这样弄得话就是O(n^2) 了,不过我们可以利用单调栈处理一下就OK了呀。
单调栈如果不太熟悉的话就去学习一下吧。
这里只简单举一下例子,介绍一下这道题用单调栈的思路:如果height数组是1 3 7 4 5 2 6,其中1 3 7 5 2属于A,4 6 属于B。
A: height[i] = 1时 栈为[1]
A: height[i] = 3时 栈为[1、3]
A: height[i] = 7时 栈为[1、3、7]
B: height[i] = 4时 栈为[1、3、4*2]
A: height[i] = 5时 栈为[1、3、4*2、5]
A: height[i] = 2时 栈为[1、2*5]
#include<iostream> #include<algorithm> #include<cstdio> #include<cmath> #include<cstring> using namespace std; #define maxn 210000 typedef long long int64; int k; char str[maxn]; int r[maxn], sa[maxn], stk[maxn], num[maxn]; int t[maxn], t2[maxn], c[maxn]; void da(int *r, int *sa, int n, int m) { int i, *x = t, *y = t2; //对r中长度为1的字串进行基数排序 for (i = 0; i < m; i++) c[i] = 0; for (i = 0; i < n; i++) c[x[i] = r[i]]++; for (i = 1; i < m; i++) c[i] += c[i - 1]; for (i = n - 1; i >= 0; i--) sa[--c[x[i]]] = i; for (int k = 1; k <= n; k <<= 1) { int p = 0; //第二关键字 for (i = n - k; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= k) y[p++] = sa[i] - k; //第一关键字 for (i = 0; i < m; i++) c[i] = 0; for (i = 0; i < n; i++) c[x[y[i]]]++; for (i = 0; i < m; i++) c[i] += c[i - 1]; for (i = n - 1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i]; swap(x, y); p = 1; x[sa[0]] = 0; for (i = 1; i < n; i++) x[sa[i]] = y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k] ? p - 1 : p++; if (p >= n) break; m = p; } } int Rank[maxn], height[maxn]; void calheight(int *r, int *sa, int n){ int i, j, k = 0; for (i = 1; i <= n; i++) Rank[sa[i]] = i; for (i = 0; i<n; height[Rank[i++]] = k) for (k ? k-- : 0, j = sa[Rank[i] - 1]; r[i + k] == r[j + k]; k++); } void slove(int n, int len) { int64 ans = 0, sum = 0, top = 0, cnt = 0; for(int i = 1; i <= n; i++) { if(height[i] < k) top = sum = 0; else{ cnt = 0; if(sa[i-1] < len){ cnt = 1; sum += height[i] - k + 1; } while(top > 0 && stk[top] >= height[i]) { sum -= num[top] * (stk[top] - height[i]); cnt += num[top]; top--; } top++; num[top] = cnt; stk[top] = height[i]; if(sa[i] > len) ans += sum; } } top = sum = 0; for(int i = 1; i <= n; i++) { if(height[i] < k) top = sum = 0; else{ cnt = 0; if(sa[i-1] > len){ cnt = 1; sum += height[i] - k + 1; } while(top > 0 && stk[top] >= height[i]) { sum -= num[top] * (stk[top] - height[i]); cnt += num[top]; top--; } top++; num[top] = cnt; stk[top] = height[i]; if(sa[i] < len) ans += sum; } } printf("%I64d\n", ans); } int main() { while (scanf("%d", &k), k) { scanf("%s", str); int len = strlen(str); str[len] = '!'; scanf("%s", str + len + 1); int n = strlen(str); for (int i = 0; i < n; i++) r[i] = (int)str[i]; r[n] = 0; // for(int i = 0; i < len ; i++) printf("%d\n", r[i]); da(r, sa, n + 1, 256); //注意必须+1 calheight(r, sa, n); slove(n, len); } return 0; }
题目:POJ 3294
http://poj.org/problem?id=3294
题意: 给你N个字符串A、B,求存在于不小于K个字符串中的最长子串
思路: 对于多个字符串的题目,一般常用的方法都是连接成一个串,二分答案,再结合height数组求解。
对于这个题目,思路其实和之前POJ1743类似,也是二分答案,再将height数组分块,然后看每一块中是否有>=K个不同的母串
#include <cstdio> #include <cmath> #include <cstring> #include <algorithm> using namespace std; #define max(a,b) ((a)>(b)?(a):(b)) #define min(a,b) ((a)<(b)?(a):(b)) const int maxn = 101000; #define F(x) ((x)/3+((x)%3==1?0:tb)) #define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2) char str[maxn]; int n, num, index[maxn], vis[maxn]; int wa[maxn], wb[maxn], wv[maxn], ws[maxn]; int c0(int *r, int a, int b){ return r[a] == r[b] && r[a + 1] == r[b + 1] && r[a + 2] == r[b + 2]; } int c12(int k, int *r, int a, int b){ if (k == 2) return r[a]<r[b] || r[a] == r[b] && c12(1, r, a + 1, b + 1); else return r[a]<r[b] || r[a] == r[b] && wv[a + 1]<wv[b + 1]; } void sort(int *r, int *a, int *b, int n, int m){ int i; for (i = 0; i<n; i++) wv[i] = r[a[i]]; for (i = 0; i<m; i++) ws[i] = 0; for (i = 0; i<n; i++) ws[wv[i]]++; for (i = 1; i<m; i++) ws[i] += ws[i - 1]; for (i = n - 1; i >= 0; i--) b[--ws[wv[i]]] = a[i]; } void DC3(int *r, int *sa, int n, int m){ int i, j, *rn = r + n, *san = sa + n, ta = 0, tb = (n + 1) / 3, tbc = 0, p; r[n] = r[n + 1] = 0; for (i = 0; i<n; i++) if (i % 3 != 0) wa[tbc++] = i; sort(r + 2, wa, wb, tbc, m); sort(r + 1, wb, wa, tbc, m); sort(r, wa, wb, tbc, m); for (p = 1, rn[F(wb[0])] = 0, i = 1; i<tbc; i++) rn[F(wb[i])] = c0(r, wb[i - 1], wb[i]) ? p - 1 : p++; if (p<tbc) DC3(rn, san, tbc, p); else for (i = 0; i<tbc; i++) san[rn[i]] = i; for (i = 0; i<tbc; i++) if (san[i]<tb) wb[ta++] = san[i] * 3; if (n % 3 == 1) wb[ta++] = n - 1; sort(r, wb, wa, ta, m); for (i = 0; i<tbc; i++) wv[wb[i] = G(san[i])] = i; for (i = 0, j = 0, p = 0; i<ta && j<tbc; p++) sa[p] = c12(wb[j] % 3, r, wa[i], wb[j]) ? wa[i++] : wb[j++]; for (; i<ta; p++) sa[p] = wa[i++]; for (; j<tbc; p++) sa[p] = wb[j++]; } int Rank[maxn], height[maxn], sa[3 * maxn], r[3 * maxn]; void calheight(int *r, int *sa, int n){ // memset(height,0,sizeof(height)); // memset(Rank,0,sizeof(Rank)); int i, j, k = 0; for (i = 1; i <= n; i++) Rank[sa[i]] = i; for (i = 0; i<n; height[Rank[i++]] = k) for (k ? k-- : 0, j = sa[Rank[i] - 1]; r[i + k] == r[j + k]; k++); } int Input() { char c = 'z' + 1; int Max = 0; num = 0; for (int i = 1; i <= n; i++) { scanf("%s", str); int len = strlen(str); Max = max(Max, len); for (int j = 0; j < len; j++) { index[num] = i; r[num++] = str[j]; } index[num] = -1; r[num++] = c++; } r[num] = 0; DC3(r, sa, num+1, 256); calheight(r, sa, num); return Max; } int num_cnt; int ans[maxn]; int slove(int x) { int flag = false; for (int i = 1; i <= num; i++) { int L = i; while (height[L] < x && L <= num) L++; if (L > num) break; int R = L; while (height[R] >= x && R <= num) R++; if (R - L + 2 < n / 2) { i = R; continue; } int cnt = 0; memset(vis, 0, sizeof(vis)); for (int j = L - 1; j < R; j++) { if (index[sa[j]] != -1 && !vis[index[sa[j]]]) { vis[index[sa[j]]] = 1; cnt++; } } if (cnt > n / 2) { if (flag) ans[++num_cnt] = sa[L - 1]; else{ num_cnt = 1; ans[num_cnt] = sa[L - 1]; flag = 1; } } i = R; } return flag; } int main() { int Case = 1; while (scanf("%d", &n), n) { int Max = Input(); int L = 0, R = Max, mid, res = 0; if(slove(Max)) res = Max; else { while (L < R) { mid = (L + R) / 2; if (slove(mid)) { res = mid; L = mid + 1; } else R = mid; } } if (Case++ != 1) puts(""); if (!res) puts("?"); else{ for (int i = 1; i <= num_cnt; i++) { int k = ans[i]; //该字串开始的位置 for (int j = 0; j < res; j++) printf("%c", r[k+j]); puts(""); } } } return 0; }
题目: SPOJ 220
http://www.spoj.com/problems/PHRASES/
题意: 给你N个串,求出在存在每个串中,并且至少出现两次,而且这两次不重叠的子串
思路: 做法和上题类似,也是先把这些串连接起来,然后二分答案,用这个答案将height数组分块,然后判断每一块中的后缀数组是否出现至少两次,然后再判断这两次是不是能够不重叠,即两个后缀数组起始位置差不小于这个二分出来的答案。
#include <cstdio> #include <cmath> #include <cstring> #include <cstdlib> #define max(a,b) ((a)>(b)?(a):(b)) #define min(a,b) ((a)<(b)?(a):(b)) const int maxn = 210000; #define F(x) ((x)/3+((x)%3==1?0:tb)) #define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2) int abs(int a) { if(a < 0) return -a; else return a; } char str[maxn]; int wa[maxn], wb[maxn], wv[maxn], ws[maxn]; int c0(int *r, int a, int b){ return r[a] == r[b] && r[a + 1] == r[b + 1] && r[a + 2] == r[b + 2]; } int c12(int k, int *r, int a, int b){ if (k == 2) return r[a]<r[b] || r[a] == r[b] && c12(1, r, a + 1, b + 1); else return r[a]<r[b] || r[a] == r[b] && wv[a + 1]<wv[b + 1]; } void sort(int *r, int *a, int *b, int n, int m){ int i; for (i = 0; i<n; i++) wv[i] = r[a[i]]; for (i = 0; i<m; i++) ws[i] = 0; for (i = 0; i<n; i++) ws[wv[i]]++; for (i = 1; i<m; i++) ws[i] += ws[i - 1]; for (i = n - 1; i >= 0; i--) b[--ws[wv[i]]] = a[i]; } void DC3(int *r, int *sa, int n, int m){ int i, j, *rn = r + n, *san = sa + n, ta = 0, tb = (n + 1) / 3, tbc = 0, p; r[n] = r[n + 1] = 0; for (i = 0; i<n; i++) if (i % 3 != 0) wa[tbc++] = i; sort(r + 2, wa, wb, tbc, m); sort(r + 1, wb, wa, tbc, m); sort(r, wa, wb, tbc, m); for (p = 1, rn[F(wb[0])] = 0, i = 1; i<tbc; i++) rn[F(wb[i])] = c0(r, wb[i - 1], wb[i]) ? p - 1 : p++; if (p<tbc) DC3(rn, san, tbc, p); else for (i = 0; i<tbc; i++) san[rn[i]] = i; for (i = 0; i<tbc; i++) if (san[i]<tb) wb[ta++] = san[i] * 3; if (n % 3 == 1) wb[ta++] = n - 1; sort(r, wb, wa, ta, m); for (i = 0; i<tbc; i++) wv[wb[i] = G(san[i])] = i; for (i = 0, j = 0, p = 0; i<ta && j<tbc; p++) sa[p] = c12(wb[j] % 3, r, wa[i], wb[j]) ? wa[i++] : wb[j++]; for (; i<ta; p++) sa[p] = wa[i++]; for (; j<tbc; p++) sa[p] = wb[j++]; } int Rank[maxn], height[maxn], sa[3 * maxn], r[3 * maxn]; void calheight(int *r, int *sa, int n){ // memset(height,0,sizeof(height)); // memset(Rank,0,sizeof(Rank)); int i, j, k = 0; for (i = 1; i <= n; i++) Rank[sa[i]] = i; for (i = 0; i<n; height[Rank[i++]] = k) for (k ? k-- : 0, j = sa[Rank[i] - 1]; r[i + k] == r[j + k]; k++); } int n, num, aaa[maxn]; int Input() { char c = 'z' + 1; int Min = 11000; num = 0; for (int i = 1; i <= n; i++) { scanf("%s", str); int len = strlen(str); Min = min(Min, len); for (int j = 0; j < len; j++) { aaa[num] = i; r[num++] = str[j]; } aaa[num] = -1; r[num++] = c++; } r[num] = 0; DC3(r, sa, num+1, 256); calheight(r, sa, num); return Min; } int vis[15],start[15]; int slove(int x) { for(int i = 1; i <= num; i++) { int L = i; while(height[L] < x && L <= num) L++; if(L > num) break; int R = L; while(height[R] >= x && R <= num) R++; memset(vis, 0, sizeof(vis)); memset(start, 0, sizeof(start)); for(int j = L - 1; j <= R - 1; j++) { if(aaa[sa[j]] != -1) { if(vis[aaa[sa[j]]] == 0) { vis[aaa[sa[j]]] = 1;//第一次出现 start[aaa[sa[j]]] = sa[j]; } else{ if(abs(sa[j] - start[aaa[sa[j]]]) >= x) vis[aaa[sa[j]]]++; } } } int cnt = 0; for(int j = 1; j <= n; j++) if(vis[j] >= 2) cnt++; if(cnt == n) return 1; i = R; } return 0; } int main() { int T; scanf("%d", &T); while(T--) { scanf("%d", &n); int Min = Input(); int L = 0, R = Min, mid, res = 0; if(slove(Min)) res = Min; else { while (L < R) { mid = (L + R) / 2; if (slove(mid)) { res = mid; L = mid + 1; } else R = mid; } } printf("%d\n", res); } return 0; }