The answer is given in the 146th episode of Star Trek - The Next Generation,titled The Chase. Itturns out that in the vast majority ofthe quadrant's life forms ended up with a large fragment of common DNA.
Given the DNA sequences of several life forms represented as stringsof letters, you are to find the longestsubstring that is shared by more than half of them.
Standard input contains several test cases. Each test case begins with1 ≤ n ≤ 100, the number of life forms. n lines follow; eachcontains a string of lower case letters representing the DNA sequence ofa life form. Each DNA sequence contains at least one and not more than 1000letters. A line containing 0 follows the last test case.
For each test case, output the longest string or stringsshared by more than half of the life forms. If there are many, output all ofthem in alphabetical order. If there is no solution with at least oneletter, output "?". Leave an empty line between test cases.
3 abcdefg bcdefgh cdefghi 3 xxx yyy zzz 0
bcdefg cdefgh ?
#include<iostream> #include<queue> #include<cstring> #include<cstdio> #include<cmath> #include<set> #include<map> #include<vector> #include<stack> #include<algorithm> #define INF 0x3f3f3f3f #define eps 1e-9 #define MAXNODE 105 #define MOD 10000007 #define SIGMA_SIZE 4 typedef long long LL; using namespace std; const int MAXN=110000; const int MAXM=110; int N,flag[MAXM],idx[MAXN]; char str[1010]; struct SuffixArray{ int s[MAXN]; //原始字符数组 int sa[MAXN]; //后缀数组,sa[i]为第i小后缀在s中的下标,最后一个字符是0,前面非0 int rank[MAXN]; //名次数组,rank[i]为s[i]后缀是第几小,rank[n-1]=0 int height[MAXN]; //height[i]为sa[i-1]和sa[i]的最长公共前缀 int c[MAXN]; //基数排序数组 int t[MAXN],t2[MAXN]; //x,y辅助数组 int n; //字符个数 void clear(){ n=0; memset(sa,0,sizeof(sa)); } //m为最大字符值+1,调用前需设置好s和n void build_sa(int m){ int i,*x=t,*y=t2; //基数排序 for(i=0;i<m;i++) c[i]=0; for(i=0;i<n;i++) c[x[i]=s[i]]++; for(i=1;i<m;i++) c[i]+=c[i-1]; for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i; for(int k=1;k<=n;k<<=1){ int p=0; //用sa数组排序第二关键字 for(i=n-k;i<n;i++) y[p++]=i; for(i=0;i<n;i++) if(sa[i]>=k) y[p++]=sa[i]-k; //基数排序第一关键字 for(int i=0;i<m;i++) c[i]=0; for(int i=0;i<n;i++) c[x[y[i]]]++; for(int i=1;i<m;i++) c[i]+=c[i-1]; for(int i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i]; swap(x,y); p=1; x[sa[0]]=0; for(int i=1;i<n;i++) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++; if(p>=n) break; m=p; } } void build_height(){ int i,j,k=0; for(int i=0;i<n;i++) rank[sa[i]]=i; height[0]=0; for(int i=0;i<n-1;i++){ if(k) k--; j=sa[rank[i]-1]; while(s[i+k]==s[j+k]) k++; height[rank[i]]=k; } } }sa; bool good(int L,int R){ if(R-L<=N/2) return false; memset(flag,0,sizeof(flag)); int cnt=0; for(int i=L;i<R;i++) if(idx[[i]]!=N&&!flag[idx[[i]]]){ cnt++; flag[idx[[i]]]=1; } return cnt>N/2; } bool check(int len){ //[L,R) int L=0; for(int R=1;R<=sa.n;R++) if(R==sa.n||sa.height[R]<len){ if(good(L,R)) return true; L=R; } return false; } void print(int len){ int L=0; for(int R=1;R<=sa.n;R++) if(R==sa.n||sa.height[R]<len){ if(good(L,R)){ for(int[L];i<[L]+len;i++) printf("%c",sa.s[i]+'a'-1); puts(""); } L=R; } } void solve(int maxlen){ if(!check(1)){ printf("?\n"); return; } int L=1,R=maxlen; while(L<R){ int mid=L+(R-L+1)/2; if(check(mid)) L=mid; else R=mid-1; } print(L); } int main(){ freopen("in.txt","r",stdin); int cas=0; while(scanf("%d",&N)!=EOF&&N){ if(++cas>1) puts(""); sa.clear(); int maxlen=0; for(int i=0;i<N;i++){ scanf("%s",str); int len=strlen(str); maxlen=max(maxlen,len); for(int j=0;j<len;j++){ idx[sa.n]=i; sa.s[sa.n++]=str[j]-'a'+1; } idx[sa.n]=N; sa.s[sa.n++]=i+100; } idx[sa.n]=N; sa.s[sa.n++]=0; if(N==1){ printf("%s\n",str); continue; } sa.build_sa(110+N); sa.build_height(); solve(maxlen); } return 0; }