提到后缀数组,离不开三个数组,sa,rank,height,sa[i]存放的是排名第i的是那个后缀,用所在的下表表示。rank[i],表示下表为i的后缀它的排名为多少,和sa互为逆运算,
简单的说,后缀数组是“排第几的是谁?”,名次数组是“你排第几?”。
对于不懂后缀数组的强烈建议看下 罗穗骞《后缀数组——处理字符串的有力工具》国家ACM论文
参考 罗穗骞《后缀数组——处理字符串的有力工具》国家ACM论文
关于此论文可以参考下面的博客 在博客内容的下面有这篇论文
http://blog.csdn.net/self_chou/article/details/7761180
另外下面的题目题解大家自己看下面的博客吧 表示自己看了好久没看懂怎么做的 最后找到了 几个比较好的文章 终于算是搞懂了
关于poj 3693
http://blog.csdn.net/acm_cxlove/article/details/7941205
http://blog.573114.com/Blog/Html/9B41/674751.html
http://www.kaixinwenda.com/article-wxy__-7618220.html 这个有图解 更好理解些
Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 5429 | Accepted: 1627 |
Description
The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.
Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.
Input
The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.
The last test case is followed by a line containing a '#'.
Output
For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.
Sample Input
ccabababc daabbccaa #
Sample Output
Case 1: ababab Case 2: aa
Source
#include<stdio.h> #include<string.h> #include<iostream> #include<cstdio> #include<vector> #include<cstring> using namespace std; const int nMax =1000012; int num[nMax]; int sa[nMax], rank[nMax], height[nMax]; int wa[nMax], wb[nMax], wv[nMax], wd[nMax]; int mmin(int a,int b) { if(a>b) return b; return a; } int cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a+l] == r[b+l]; } void da(int *r, int n, int m){ // 倍增算法 r为待匹配数组 n为总长度 m为字符范围 int i, j, p, *x = wa, *y = wb, *t; for(i = 0; i < m; i ++) wd[i] = 0; for(i = 0; i < n; i ++) wd[x[i]=r[i]] ++; for(i = 1; i < m; i ++) wd[i] += wd[i-1]; for(i = n-1; i >= 0; i --) sa[-- wd[x[i]]] = i; for(j = 1, p = 1; p < n; j *= 2, m = p){ for(p = 0, i = n-j; i < n; i ++) y[p ++] = i; for(i = 0; i < n; i ++) if(sa[i] >= j) y[p ++] = sa[i] - j; for(i = 0; i < n; i ++) wv[i] = x[y[i]]; for(i = 0; i < m; i ++) wd[i] = 0; for(i = 0; i < n; i ++) wd[wv[i]] ++; for(i = 1; i < m; i ++) wd[i] += wd[i-1]; for(i = n-1; i >= 0; i --) sa[-- wd[wv[i]]] = y[i]; for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; i ++){ x[sa[i]] = cmp(y, sa[i-1], sa[i], j) ? p - 1: p ++; } } } void calHeight(int *r, int n){ // 求height数组。 int i, j, k = 0; for(i = 1; i <= n; i ++) rank[sa[i]] = i; // 1->n for(i = 0; i < n; i++){ for(k ? k -- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k ++); height[rank[i]] = k; } } int Log[nMax]; int best[20][nMax];//best[i][j] 表示从j开始的长度为2的i次方的一段元素的最小值 void initRMQ(int n) {//初始化RMQ int i,j; for(i = 1; i <= n ; i ++) best[0][i] = height[i]; for(i = 1; i <= Log[n] ; i ++) { int limit = n - (1<<i) + 1; for(j = 1; j <= limit ; j ++) { best[i][j] = mmin(best[i-1][j] , best[i-1][j+(1<<i>>1)]); } } } int lcp(int a,int b) {//询问a,b后缀的最长公共前缀 a = rank[a]; b = rank[b]; if(a > b) swap(a,b); a ++; int t = Log[b - a + 1]; return mmin(best[t][a] , best[t][b - (1<<t) + 1]); } void get_log() { int i; Log[0] = -1; for(i=1;i<=nMax;i++) { // 求log2,这么强大的位运算。。 Log[i]=(i&(i-1))?Log[i-1]:Log[i-1] + 1 ; } } char str[nMax]; int ans[nMax]; int main() { int i,j,n,cas=0;; get_log(); while(scanf("%s",str)!=EOF) { n=strlen(str); if(n==1&&str[0]=='#') break; for(i=0;i<n;i++) { num[i]=str[i]-'a'+1; } num[n]=0;///////// da(num,n+1,30); calHeight(num,n); initRMQ(n); int l,mmax=-1,cnt,pos,t=0; for(l=1;l<n;l++) { for(i=0;i+l<n;i+=l) { int k=lcp(i,i+l); cnt=k/l+1; pos=k%l; pos=l-pos; pos=i-pos; if(pos>=0&&k%l!=0&&lcp(pos,pos+l)>=k) cnt++;//// if(cnt>mmax) { mmax=cnt; t=0; ans[t++]=l; } else if(cnt==mmax) { ans[t++]=l; } } } int flag=0,start; for(i=1;i<=n;i++) { for(j=0;j<t;j++) { l=ans[j]; if(lcp(sa[i],sa[i]+l)>=(mmax-1)*l) { start=sa[i]; l=l*mmax; flag=1; break; } } if(flag) break; } printf("Case %d: ",++cas); for (int i=0;i<l;i++) printf("%c",str[start+ i]); printf("\n"); } return 0; }
注意 千万注意 数组是从0开始的 所以 rank sa 也是从0开始的
D3 做法
#include <stdio.h> #include<string.h> #define maxn 1000001 #define F(x) ((x)/3+((x)%3==1?0:tb)) #define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2) int wa[maxn],wb[maxn],wv[maxn],ws[maxn]; int c0(int *r,int a,int b) {return r[a]==r[b]&&r[a+1]==r[b+1]&&r[a+2]==r[b+2];} int c12(int k,int *r,int a,int b) {if(k==2) return r[a]<r[b]||r[a]==r[b]&&c12(1,r,a+1,b+1); else return r[a]<r[b]||r[a]==r[b]&&wv[a+1]<wv[b+1];} void sort(int *r,int *a,int *b,int n,int m) { int i; for(i=0;i<n;i++) wv[i]=r[a[i]]; for(i=0;i<m;i++) ws[i]=0; for(i=0;i<n;i++) ws[wv[i]]++; for(i=1;i<m;i++) ws[i]+=ws[i-1]; for(i=n-1;i>=0;i--) b[--ws[wv[i]]]=a[i]; return; } void dc3(int *r,int *sa,int n,int m) // r为待匹配数组 n为总长度 m为字符范围 { int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p; r[n]=r[n+1]=0; for(i=0;i<n;i++) if(i%3!=0) wa[tbc++]=i; sort(r+2,wa,wb,tbc,m); sort(r+1,wb,wa,tbc,m); sort(r,wa,wb,tbc,m); for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;i++) rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++; if(p<tbc) dc3(rn,san,tbc,p); else for(i=0;i<tbc;i++) san[rn[i]]=i; for(i=0;i<tbc;i++) if(san[i]<tb) wb[ta++]=san[i]*3; if(n%3==1) wb[ta++]=n-1; sort(r,wb,wa,ta,m); for(i=0;i<tbc;i++) wv[wb[i]=G(san[i])]=i; for(i=0,j=0,p=0;i<ta && j<tbc;p++) sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++]; for(;i<ta;p++) sa[p]=wa[i++]; for(;j<tbc;p++) sa[p]=wb[j++]; return; } int rank[maxn],height[maxn]; void calheight(int *r,int *sa,int n) // 求height数组。 { int i,j,k=0; for(i=1;i<=n;i++) rank[sa[i]]=i; for(i=0;i<n;height[rank[i++]]=k) for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++); return; } int RMQ[maxn]; int mm[maxn]; int best[20][maxn];//best[i][j] 表示从j开始的长度为2的i次方的一段元素的最小值 void initRMQ(int n) { int i,j,a,b; for(mm[0]=-1,i=1;i<=n;i++) mm[i]=((i&(i-1))==0)?mm[i-1]+1:mm[i-1]; for(i=1;i<=n;i++) best[0][i]=i; for(i=1;i<=mm[n];i++) for(j=1;j<=n+1-(1<<i);j++) { a=best[i-1][j]; b=best[i-1][j+(1<<(i-1))]; if(RMQ[a]<RMQ[b]) best[i][j]=a; else best[i][j]=b; } return; } int askRMQ(int a,int b)//询问a,b后缀的最长公共前缀 { int t; t=mm[b-a+1];b-=(1<<t)-1; a=best[t][a];b=best[t][b]; return RMQ[a]<RMQ[b]?a:b; } int lcp(int a,int b) { int t; a=rank[a];b=rank[b]; if(a>b) {t=a;a=b;b=t;} return(height[askRMQ(a+1,b)]); } char c; int r[maxn*3],sa[maxn*3]; int ans[maxn]; char str[maxn*3]; int main() { int i,j,n,cas=0; while(scanf("%s",str)!=EOF) { n=strlen(str); if(n==1&&str[0]=='#') break; for(i=0;i<n;i++) r[i]=str[i]-'a'+1; r[n]=0; dc3(r,sa,n+1,30);//千万注意是n+1 calheight(r,sa,n); for(i=1;i<=n;i++) RMQ[i]=height[i]; initRMQ(n); /* for(i=0; i<n+1; i++) // rank[i] : suffix(i)排第几 printf("rank[%d] = %d\n",i,rank[i]); printf("\n"); for(i=0; i<n+1; i++) // sa[i] : 排在第i个的是谁 printf("sa[%d] = %d\n",i,sa[i]); */ int l,mmax=-1,cnt,pos,t=0; for(l=1;l<n;l++) { for(i=0;i+l<n;i+=l) { int k=lcp(i,i+l); cnt=k/l+1; pos=k%l; pos=l-pos; pos=i-pos; if(pos>=0&&k%l!=0&&lcp(pos,pos+l)>=k) cnt++;//// if(cnt>mmax) { mmax=cnt; t=0; ans[t++]=l; } else if(cnt==mmax) { ans[t++]=l; } } } int flag=0,start; for(i=1;i<=n;i++) { for(j=0;j<t;j++) { l=ans[j]; if(lcp(sa[i],sa[i]+l)>=(mmax-1)*l) { start=sa[i]; l=l*mmax; flag=1; break; } } if(flag) break; } printf("Case %d: ",++cas); for (int i=0;i<l;i++) printf("%c",str[start+ i]); printf("\n"); } return 0; }