题目链接:http://poj.org/problem?id=3294
题目大意:给定n个字符串,找出至少出现在n/2个字符串中的最长子串。
解题思路:本题可用后缀数组解决。先用倍增算法算出sa数组、rank数组再算出height数组,最后二分枚举长度,看是否符合出现在n/2个字符串中。题目并不难,但比较麻烦,首先要记录最长的字符串,作为二分查找的上届,然后在二分的时候要对height数组进行分组,将大于mid的分一组,看着一组中的子串都在哪些字符串中,用vis标记出现过的字符串,以免重复。还有一个地方要注意的是中间的分隔尽量不要一样,我就用了96个字符进行分隔,这可大幅度减少错误。
测试数据:
3
aaa
nstv
cde
cdefghi
代码:
#include <stdio.h> #include <string.h> #define MIN 110 #define MAX 1100 #define INF 110000 char str[MIN][MAX]; int ans[MAX],tot,minlen,maxlen; int sa[INF],ra[INF],h[INF],len; int n,arr[INF],in[INF],vis[MIN]; int wa[INF],wb[INF],wv[INF],wn[INF]; int cmp(int *r,int ra,int rb,int l){ return r[ra] == r[rb] && r[ra+l] == r[rb+l]; } void Da(int *r,int n,int m) { int i,j,k,p,*t; int *x = wa,*y = wb; for (i = 0; i < m; ++i) wn[i] = 0; for (i = 0; i < n; ++i) wn[x[i]=r[i]]++; for (i = 1; i < m; ++i) wn[i] += wn[i-1]; for (i = n - 1; i >= 0; --i) sa[--wn[x[i]]] = i; for (j = 1,p = 1; p < n; j *= 2,m = p) { for (p = 0,i = n - j; i < n; ++i) y[p++] = i; for (i = 0; i < n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < n; ++i) wv[i] = x[y[i]]; for (i = 0; i < m; ++i) wn[i] = 0; for (i = 0; i < n; ++i) wn[wv[i]]++; for (i = 1; i < m; ++i) wn[i] += wn[i-1]; for (i = n - 1; i >= 0; --i) sa[--wn[wv[i]]] = y[i]; t = x,x = y,y = t,p = 1; for (x[sa[0]] = 0,i = 1; i < n; ++i) x[sa[i]] = cmp(y,sa[i-1],sa[i],j) ? p - 1 : p++; } } void CalHeight(int *r,int n) { int i,j,k = 0; for (i = 1; i <= n; ++i) ra[sa[i]] = i; for (i = 0; i < n; h[ra[i++]] = k) for (k ? k-- : 0,j = sa[ra[i]-1]; r[i+k] == r[j+k]; k++); } void Solve(int *r,int len,int *ans,int minlen) { int i,j,num,beg,cnt,flag; int low,high,mid,half; maxlen = 0; low = 1,high = minlen; half = n / 2.0; while (low <= high) { cnt = 0; mid = low + (high - low) / 2; for (i = 1; i <= len; ++i){ if (h[i] < mid) { memset(vis,0,sizeof(vis)); vis[in[sa[i]]] = 1,flag = 0; //flag标记使得当前开始的答案只记录一次 num = 1,beg = sa[i+1]; //beg从哪个地方开始,num表示子串出现在几个字符串中 } else{ j = sa[i]; if (vis[in[j]] == 0 && arr[j] != '$') num++,vis[in[j]] = 1; if (num > half && !flag) ans[cnt++] = beg,flag = 1; } } if (cnt != 0) { //符合情况 maxlen = mid; tot = cnt; low = mid + 1; } else high = mid - 1; } } int main() { int i,j,k,c; scanf("%d",&n); do{ minlen = 0,tot = 0,c = 0; memset(in,0,sizeof(in)); memset(arr,0,sizeof(arr)); memset(ra,0,sizeof(ra)); for (i = 0; i < n; ++i) { scanf("%s",str[i]); if (strlen(str[i]) > minlen); minlen = strlen(str[i]); } for (i = k = 0; i < n; ++i) { for (j = 0; str[i][j]; ++j) arr[k] = str[i][j],in[k++] = i; c = (c + 1) % 96,arr[k++] = c; } len = k - 1; Da(arr,len + 1,150); CalHeight(arr,len); Solve(arr,len,ans,minlen); if (n == 1) printf("%s\n",str[0]); else if (maxlen == 0) printf("?\n"); else { for (i = 0; i < tot; ++i) for (j = ans[i]; j < ans[i] + maxlen; ++j) printf(j == (ans[i] + maxlen - 1) ? "%c\n":"%c",arr[j]); } scanf("%d",&n); if (n != 0) printf("\n"); }while (n); }
本文ZeroClock原创,但可以转载,因为我们是兄弟。